diff --git a/CMake/CommonInterfaces.cmake b/CMake/CommonInterfaces.cmake
index f4d6f8758..4feae739a 100644
--- a/CMake/CommonInterfaces.cmake
+++ b/CMake/CommonInterfaces.cmake
@@ -21,7 +21,6 @@ endif()
 # Variables assumed to have been set in parent scope:
 # INTERFACE_SRC
 # INTERFACE_INCLUDE
-# INTERFACE_TCC
 # INTERFACE_MAIN  (the main .i file)
 # INTERFACE_FILES (the other .i files)
 # INTERFACE_DEPENDENCIES
@@ -57,12 +56,11 @@ macro(generate_interface_module MODULE_NAME TARGET_NAME LANGUAGE_NAME LANGUAGE_D
 
   source_group("src" FILES ${INTERFACE_SRC})
   source_group("include" FILES ${INTERFACE_INCLUDE})
-  source_group("tcc" FILES ${INTERFACE_TCC})
   source_group("interface" FILES ${INTERFACE_MAIN} ${INTERFACE_FILES})
 
   if(${language} STREQUAL "common")
     find_file(THIS_FILE_PATH CommonInterfaces.cmake PATHS ${CMAKE_MODULE_PATH})
-    add_custom_target(${module_name} DEPENDS ${INTERFACE_SRC} ${INTERFACE_INCLUDE} ${INTERFACE_TCC} ${INTERFACE_MAIN} ${INTERFACE_FILES} SOURCES ${INTERFACE_SRC} ${INTERFACE_INCLUDE} ${INTERFACE_TCC} ${INTERFACE_MAIN} ${INTERFACE_FILES} ${THIS_FILE_PATH})
+    add_custom_target(${module_name} DEPENDS ${INTERFACE_SRC} ${INTERFACE_INCLUDE} ${INTERFACE_MAIN} ${INTERFACE_FILES} SOURCES ${INTERFACE_SRC} ${INTERFACE_INCLUDE} ${INTERFACE_MAIN} ${INTERFACE_FILES} ${THIS_FILE_PATH})
 
     # Make interface code be dependent on all libraries
     add_dependencies(${module_name} ${INTERFACE_DEPENDENCIES})
@@ -71,7 +69,7 @@ macro(generate_interface_module MODULE_NAME TARGET_NAME LANGUAGE_NAME LANGUAGE_D
 
     include_directories(${EXTRA_INCLUDE_PATHS})
 
-    foreach(file ${INTERFACE_FILES} ${INTERFACE_SRC} ${INTERFACE_INCLUDE} ${INTERFACE_TCC})
+    foreach(file ${INTERFACE_FILES} ${INTERFACE_SRC} ${INTERFACE_INCLUDE})
         configure_file(${file} ${file} COPYONLY)
     endforeach()
 
@@ -94,13 +92,13 @@ macro(generate_interface_module MODULE_NAME TARGET_NAME LANGUAGE_NAME LANGUAGE_D
       add_definitions(-DSWIG_DIRECTOR_NO_UEH)
     endif()
 
-    set(SWIG_MODULE_${module_name}_EXTRA_DEPS ${INTERFACE_FILES} ${INTERFACE_SRC} ${INTERFACE_INCLUDE} ${INTERFACE_TCC} ${EXTRA_INTERFACE})
+    set(SWIG_MODULE_${module_name}_EXTRA_DEPS ${INTERFACE_FILES} ${INTERFACE_SRC} ${INTERFACE_INCLUDE} ${EXTRA_INTERFACE})
 
     foreach(file ${INTERFACE_INCLUDE} ${INTERFACE_SRC})
       set_source_files_properties(${INTERFACE_MAIN} PROPERTIES OBJECT_DEPENDS ${file})
     endforeach()
 
-    foreach(file ${INTERFACE_INCLUDE} ${INTERFACE_TCC})
+    foreach(file ${INTERFACE_INCLUDE})
       set_source_files_properties(${INTERFACE_MAIN} PROPERTIES OBJECT_DEPENDS ${file})
     endforeach()
 
@@ -153,7 +151,6 @@ endmacro() # generate_interface_module
 # Variables assumed to have been set in parent scope:
 # INTERFACE_SRC
 # INTERFACE_INCLUDE
-# INTERFACE_TCC
 # INTERFACE_MAIN  (the main .i file)
 # INTERFACE_FILES (the other .i files)
 # INTERFACE_DEPENDENCIES
@@ -162,5 +159,3 @@ endmacro() # generate_interface_module
 macro(generate_interface LANGUAGE_NAME MODULE_NAME LANGUAGE_DIR LANGUAGE_LIBRARIES EXTRA_INTERFACE)
   generate_interface_module("ELL_${LANGUAGE_NAME}" "${MODULE_NAME}" "${LANGUAGE_NAME}" "${LANGUAGE_DIR}" "${LANGUAGE_LIBRARIES}" "${EXTRA_INTERFACE}")
 endmacro()
-
-#
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3315e2c5d..6f0923122 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -117,6 +117,7 @@ else()
   add_compile_options(-Wno-missing-braces)
   add_compile_options(-Wmissing-field-initializers)
   add_compile_options(-fvisibility-inlines-hidden)
+  add_compile_options(-Wno-unknown-pragmas)
   set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -ggdb3 -O0")
   set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -ggdb3 -O0")
   set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -ggdb3")
diff --git a/Doxyfile b/Doxyfile
index ca18fd9c6..c1e630928 100644
--- a/Doxyfile
+++ b/Doxyfile
@@ -38,20 +38,20 @@ PROJECT_NAME           = ELL
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = 
+PROJECT_NUMBER         =
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
 # quick idea about the purpose of the project. Keep the description short.
 
-PROJECT_BRIEF          = 
+PROJECT_BRIEF          =
 
 # With the PROJECT_LOGO tag one can specify a logo or an icon that is included
 # in the documentation. The maximum height of the logo should not exceed 55
 # pixels and the maximum width should not exceed 200 pixels. Doxygen will copy
 # the logo to the output directory.
 
-PROJECT_LOGO           = 
+PROJECT_LOGO           =
 
 # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path
 # into which the generated documentation will be written. If a relative path is
@@ -171,7 +171,7 @@ STRIP_FROM_PATH        = @CMAKE_CURRENT_SOURCE_DIR@
 # specify the list of include paths that are normally passed to the compiler
 # using the -I flag.
 
-STRIP_FROM_INC_PATH    = 
+STRIP_FROM_INC_PATH    =
 
 # If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but
 # less readable) file names. This can be useful is your file systems doesn't
@@ -238,13 +238,13 @@ TAB_SIZE               = 4
 # "Side Effects:". You can put \n's in the value part of an alias to insert
 # newlines.
 
-ALIASES                = 
+ALIASES                =
 
 # This tag can be used to specify a number of word-keyword mappings (TCL only).
 # A mapping has the form "name=value". For example adding "class=itcl::class"
 # will allow you to use the command class in the itcl::class meaning.
 
-TCL_SUBST              = 
+TCL_SUBST              =
 
 # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources
 # only. Doxygen will then generate output that is more tailored for C. For
@@ -291,7 +291,7 @@ OPTIMIZE_OUTPUT_VHDL   = NO
 # Note that for custom extensions you also need to set FILE_PATTERNS otherwise
 # the files are not read by doxygen.
 
-EXTENSION_MAPPING      = 
+EXTENSION_MAPPING      =
 
 # If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments
 # according to the Markdown format, which allows for more readable
@@ -639,7 +639,7 @@ GENERATE_DEPRECATEDLIST= YES
 # sections, marked by \if <section_label> ... \endif and \cond <section_label>
 # ... \endcond blocks.
 
-ENABLED_SECTIONS       = 
+ENABLED_SECTIONS       =
 
 # The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the
 # initial value of a variable or macro / define can have for it to appear in the
@@ -681,7 +681,7 @@ SHOW_NAMESPACES        = YES
 # by doxygen. Whatever the program writes to standard output is used as the file
 # version. For an example see the documentation.
 
-FILE_VERSION_FILTER    = 
+FILE_VERSION_FILTER    =
 
 # The LAYOUT_FILE tag can be used to specify a layout file which will be parsed
 # by doxygen. The layout file controls the global structure of the generated
@@ -694,7 +694,7 @@ FILE_VERSION_FILTER    =
 # DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE
 # tag is left empty.
 
-LAYOUT_FILE            = 
+LAYOUT_FILE            =
 
 # The CITE_BIB_FILES tag can be used to specify one or more bib files containing
 # the reference definitions. This must be a list of .bib files. The .bib
@@ -704,7 +704,7 @@ LAYOUT_FILE            =
 # LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the
 # search path. See also \cite for info how to create references.
 
-CITE_BIB_FILES         = 
+CITE_BIB_FILES         =
 
 #---------------------------------------------------------------------------
 # Configuration options related to warning and progress messages
@@ -848,8 +848,7 @@ FILE_PATTERNS          = *.c \
                          *.ucf \
                          *.qsf \
                          *.as \
-                         *.js \
-                         *.tcc
+                         *.js
 
 # The RECURSIVE tag can be used to specify whether or not subdirectories should
 # be searched for input files as well.
@@ -864,7 +863,7 @@ RECURSIVE              = YES
 # Note that relative paths are relative to the directory from which doxygen is
 # run.
 
-EXCLUDE                = 
+EXCLUDE                =
 
 # The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
 # directories that are symbolic links (a Unix file system feature) are excluded
@@ -892,13 +891,13 @@ EXCLUDE_PATTERNS       = */*/test/* \
 # Note that the wildcards are matched against the file with absolute path, so to
 # exclude all test directories use the pattern */test/*
 
-EXCLUDE_SYMBOLS        = 
+EXCLUDE_SYMBOLS        =
 
 # The EXAMPLE_PATH tag can be used to specify one or more files or directories
 # that contain example code fragments that are included (see the \include
 # command).
 
-EXAMPLE_PATH           = 
+EXAMPLE_PATH           =
 
 # If the value of the EXAMPLE_PATH tag contains directories, you can use the
 # EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and
@@ -918,7 +917,7 @@ EXAMPLE_RECURSIVE      = NO
 # that contain images that are to be included in the documentation (see the
 # \image command).
 
-IMAGE_PATH             = 
+IMAGE_PATH             =
 
 # The INPUT_FILTER tag can be used to specify a program that doxygen should
 # invoke to filter for each input file. Doxygen will invoke the filter program
@@ -939,7 +938,7 @@ IMAGE_PATH             =
 # need to set EXTENSION_MAPPING for the extension otherwise the files are not
 # properly processed by doxygen.
 
-INPUT_FILTER           = 
+INPUT_FILTER           =
 
 # The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
 # basis. Doxygen will compare the file name with each pattern and apply the
@@ -952,7 +951,7 @@ INPUT_FILTER           =
 # need to set EXTENSION_MAPPING for the extension otherwise the files are not
 # properly processed by doxygen.
 
-FILTER_PATTERNS        = 
+FILTER_PATTERNS        =
 
 # If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
 # INPUT_FILTER) will also be used to filter the input files that are used for
@@ -967,14 +966,14 @@ FILTER_SOURCE_FILES    = NO
 # *.ext= (so without naming a filter).
 # This tag requires that the tag FILTER_SOURCE_FILES is set to YES.
 
-FILTER_SOURCE_PATTERNS = 
+FILTER_SOURCE_PATTERNS =
 
 # If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that
 # is part of the input, its contents will be placed on the main page
 # (index.html). This can be useful if you have a project on for instance GitHub
 # and want to reuse the introduction page also for the doxygen output.
 
-USE_MDFILE_AS_MAINPAGE = 
+USE_MDFILE_AS_MAINPAGE =
 
 #---------------------------------------------------------------------------
 # Configuration options related to source browsing
@@ -1079,7 +1078,7 @@ CLANG_ASSISTED_PARSING = NO
 # specified with INPUT and INCLUDE_PATH.
 # This tag requires that the tag CLANG_ASSISTED_PARSING is set to YES.
 
-CLANG_OPTIONS          = 
+CLANG_OPTIONS          =
 
 #---------------------------------------------------------------------------
 # Configuration options related to the alphabetical class index
@@ -1105,7 +1104,7 @@ COLS_IN_ALPHA_INDEX    = 5
 # while generating the index headers.
 # This tag requires that the tag ALPHABETICAL_INDEX is set to YES.
 
-IGNORE_PREFIX          = 
+IGNORE_PREFIX          =
 
 #---------------------------------------------------------------------------
 # Configuration options related to the HTML output
@@ -1149,7 +1148,7 @@ HTML_FILE_EXTENSION    = .html
 # of the possible markers and block names see the documentation.
 # This tag requires that the tag GENERATE_HTML is set to YES.
 
-HTML_HEADER            = 
+HTML_HEADER            =
 
 # The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each
 # generated HTML page. If the tag is left blank doxygen will generate a standard
@@ -1159,7 +1158,7 @@ HTML_HEADER            =
 # that doxygen normally uses.
 # This tag requires that the tag GENERATE_HTML is set to YES.
 
-HTML_FOOTER            = 
+HTML_FOOTER            =
 
 # The HTML_STYLESHEET tag can be used to specify a user-defined cascading style
 # sheet that is used by each HTML page. It can be used to fine-tune the look of
@@ -1171,7 +1170,7 @@ HTML_FOOTER            =
 # obsolete.
 # This tag requires that the tag GENERATE_HTML is set to YES.
 
-HTML_STYLESHEET        = 
+HTML_STYLESHEET        =
 
 # The HTML_EXTRA_STYLESHEET tag can be used to specify additional user-defined
 # cascading style sheets that are included after the standard style sheets
@@ -1184,7 +1183,7 @@ HTML_STYLESHEET        =
 # list). For an example see the documentation.
 # This tag requires that the tag GENERATE_HTML is set to YES.
 
-HTML_EXTRA_STYLESHEET  = 
+HTML_EXTRA_STYLESHEET  =
 
 # The HTML_EXTRA_FILES tag can be used to specify one or more extra images or
 # other source files which should be copied to the HTML output directory. Note
@@ -1194,7 +1193,7 @@ HTML_EXTRA_STYLESHEET  =
 # files will be copied as-is; there are no commands or markers available.
 # This tag requires that the tag GENERATE_HTML is set to YES.
 
-HTML_EXTRA_FILES       = 
+HTML_EXTRA_FILES       =
 
 # The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen
 # will adjust the colors in the style sheet and background images according to
@@ -1323,7 +1322,7 @@ GENERATE_HTMLHELP      = NO
 # written to the html output directory.
 # This tag requires that the tag GENERATE_HTMLHELP is set to YES.
 
-CHM_FILE               = 
+CHM_FILE               =
 
 # The HHC_LOCATION tag can be used to specify the location (absolute path
 # including file name) of the HTML help compiler (hhc.exe). If non-empty,
@@ -1331,7 +1330,7 @@ CHM_FILE               =
 # The file has to be specified with full path.
 # This tag requires that the tag GENERATE_HTMLHELP is set to YES.
 
-HHC_LOCATION           = 
+HHC_LOCATION           =
 
 # The GENERATE_CHI flag controls if a separate .chi index file is generated
 # (YES) or that it should be included in the master .chm file (NO).
@@ -1344,7 +1343,7 @@ GENERATE_CHI           = NO
 # and project file content.
 # This tag requires that the tag GENERATE_HTMLHELP is set to YES.
 
-CHM_INDEX_ENCODING     = 
+CHM_INDEX_ENCODING     =
 
 # The BINARY_TOC flag controls whether a binary table of contents is generated
 # (YES) or a normal table of contents (NO) in the .chm file. Furthermore it
@@ -1375,7 +1374,7 @@ GENERATE_QHP           = NO
 # the HTML output folder.
 # This tag requires that the tag GENERATE_QHP is set to YES.
 
-QCH_FILE               = 
+QCH_FILE               =
 
 # The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help
 # Project output. For more information please see Qt Help Project / Namespace
@@ -1400,7 +1399,7 @@ QHP_VIRTUAL_FOLDER     = doc
 # filters).
 # This tag requires that the tag GENERATE_QHP is set to YES.
 
-QHP_CUST_FILTER_NAME   = 
+QHP_CUST_FILTER_NAME   =
 
 # The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the
 # custom filter to add. For more information please see Qt Help Project / Custom
@@ -1408,21 +1407,21 @@ QHP_CUST_FILTER_NAME   =
 # filters).
 # This tag requires that the tag GENERATE_QHP is set to YES.
 
-QHP_CUST_FILTER_ATTRS  = 
+QHP_CUST_FILTER_ATTRS  =
 
 # The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this
 # project's filter section matches. Qt Help Project / Filter Attributes (see:
 # http://qt-project.org/doc/qt-4.8/qthelpproject.html#filter-attributes).
 # This tag requires that the tag GENERATE_QHP is set to YES.
 
-QHP_SECT_FILTER_ATTRS  = 
+QHP_SECT_FILTER_ATTRS  =
 
 # The QHG_LOCATION tag can be used to specify the location of Qt's
 # qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the
 # generated .qhp file.
 # This tag requires that the tag GENERATE_QHP is set to YES.
 
-QHG_LOCATION           = 
+QHG_LOCATION           =
 
 # If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be
 # generated, together with the HTML files, they form an Eclipse help plugin. To
@@ -1555,7 +1554,7 @@ MATHJAX_RELPATH        = http://cdn.mathjax.org/mathjax/latest
 # MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols
 # This tag requires that the tag USE_MATHJAX is set to YES.
 
-MATHJAX_EXTENSIONS     = 
+MATHJAX_EXTENSIONS     =
 
 # The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces
 # of code that will be used on startup of the MathJax code. See the MathJax site
@@ -1563,7 +1562,7 @@ MATHJAX_EXTENSIONS     =
 # example see the documentation.
 # This tag requires that the tag USE_MATHJAX is set to YES.
 
-MATHJAX_CODEFILE       = 
+MATHJAX_CODEFILE       =
 
 # When the SEARCHENGINE tag is enabled doxygen will generate a search box for
 # the HTML output. The underlying search engine uses javascript and DHTML and
@@ -1623,7 +1622,7 @@ EXTERNAL_SEARCH        = NO
 # Searching" for details.
 # This tag requires that the tag SEARCHENGINE is set to YES.
 
-SEARCHENGINE_URL       = 
+SEARCHENGINE_URL       =
 
 # When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the unindexed
 # search data is written to a file for indexing by an external tool. With the
@@ -1639,7 +1638,7 @@ SEARCHDATA_FILE        = searchdata.xml
 # projects and redirect the results back to the right project.
 # This tag requires that the tag SEARCHENGINE is set to YES.
 
-EXTERNAL_SEARCH_ID     = 
+EXTERNAL_SEARCH_ID     =
 
 # The EXTRA_SEARCH_MAPPINGS tag can be used to enable searching through doxygen
 # projects other than the one defined by this configuration file, but that are
@@ -1649,7 +1648,7 @@ EXTERNAL_SEARCH_ID     =
 # EXTRA_SEARCH_MAPPINGS = tagname1=loc1 tagname2=loc2 ...
 # This tag requires that the tag SEARCHENGINE is set to YES.
 
-EXTRA_SEARCH_MAPPINGS  = 
+EXTRA_SEARCH_MAPPINGS  =
 
 #---------------------------------------------------------------------------
 # Configuration options related to the LaTeX output
@@ -1713,7 +1712,7 @@ PAPER_TYPE             = a4
 # If left blank no extra packages will be included.
 # This tag requires that the tag GENERATE_LATEX is set to YES.
 
-EXTRA_PACKAGES         = 
+EXTRA_PACKAGES         =
 
 # The LATEX_HEADER tag can be used to specify a personal LaTeX header for the
 # generated LaTeX document. The header should contain everything until the first
@@ -1729,7 +1728,7 @@ EXTRA_PACKAGES         =
 # to HTML_HEADER.
 # This tag requires that the tag GENERATE_LATEX is set to YES.
 
-LATEX_HEADER           = 
+LATEX_HEADER           =
 
 # The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for the
 # generated LaTeX document. The footer should contain everything after the last
@@ -1740,7 +1739,7 @@ LATEX_HEADER           =
 # Note: Only use a user-defined footer if you know what you are doing!
 # This tag requires that the tag GENERATE_LATEX is set to YES.
 
-LATEX_FOOTER           = 
+LATEX_FOOTER           =
 
 # The LATEX_EXTRA_STYLESHEET tag can be used to specify additional user-defined
 # LaTeX style sheets that are included after the standard style sheets created
@@ -1751,7 +1750,7 @@ LATEX_FOOTER           =
 # list).
 # This tag requires that the tag GENERATE_LATEX is set to YES.
 
-LATEX_EXTRA_STYLESHEET = 
+LATEX_EXTRA_STYLESHEET =
 
 # The LATEX_EXTRA_FILES tag can be used to specify one or more extra images or
 # other source files which should be copied to the LATEX_OUTPUT output
@@ -1759,7 +1758,7 @@ LATEX_EXTRA_STYLESHEET =
 # markers available.
 # This tag requires that the tag GENERATE_LATEX is set to YES.
 
-LATEX_EXTRA_FILES      = 
+LATEX_EXTRA_FILES      =
 
 # If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated is
 # prepared for conversion to PDF (using ps2pdf or pdflatex). The PDF file will
@@ -1867,14 +1866,14 @@ RTF_HYPERLINKS         = NO
 # default style sheet that doxygen normally uses.
 # This tag requires that the tag GENERATE_RTF is set to YES.
 
-RTF_STYLESHEET_FILE    = 
+RTF_STYLESHEET_FILE    =
 
 # Set optional variables used in the generation of an RTF document. Syntax is
 # similar to doxygen's config file. A template extensions file can be generated
 # using doxygen -e rtf extensionFile.
 # This tag requires that the tag GENERATE_RTF is set to YES.
 
-RTF_EXTENSIONS_FILE    = 
+RTF_EXTENSIONS_FILE    =
 
 # If the RTF_SOURCE_CODE tag is set to YES then doxygen will include source code
 # with syntax highlighting in the RTF output.
@@ -1919,7 +1918,7 @@ MAN_EXTENSION          = .3
 # MAN_EXTENSION with the initial . removed.
 # This tag requires that the tag GENERATE_MAN is set to YES.
 
-MAN_SUBDIR             = 
+MAN_SUBDIR             =
 
 # If the MAN_LINKS tag is set to YES and doxygen generates man output, then it
 # will generate one additional man file for each entity documented in the real
@@ -2032,7 +2031,7 @@ PERLMOD_PRETTY         = YES
 # overwrite each other's variables.
 # This tag requires that the tag GENERATE_PERLMOD is set to YES.
 
-PERLMOD_MAKEVAR_PREFIX = 
+PERLMOD_MAKEVAR_PREFIX =
 
 #---------------------------------------------------------------------------
 # Configuration options related to the preprocessor
@@ -2073,7 +2072,7 @@ SEARCH_INCLUDES        = YES
 # preprocessor.
 # This tag requires that the tag SEARCH_INCLUDES is set to YES.
 
-INCLUDE_PATH           = 
+INCLUDE_PATH           =
 
 # You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
 # patterns (like *.h and *.hpp) to filter out the header-files in the
@@ -2081,7 +2080,7 @@ INCLUDE_PATH           =
 # used.
 # This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
 
-INCLUDE_FILE_PATTERNS  = 
+INCLUDE_FILE_PATTERNS  =
 
 # The PREDEFINED tag can be used to specify one or more macro names that are
 # defined before the preprocessor is started (similar to the -D option of e.g.
@@ -2091,7 +2090,7 @@ INCLUDE_FILE_PATTERNS  =
 # recursively expanded use the := operator instead of the = operator.
 # This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
 
-PREDEFINED             = 
+PREDEFINED             =
 
 # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
 # tag can be used to specify a list of macro names that should be expanded. The
@@ -2100,7 +2099,7 @@ PREDEFINED             =
 # definition found in the source code.
 # This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
 
-EXPAND_AS_DEFINED      = 
+EXPAND_AS_DEFINED      =
 
 # If the SKIP_FUNCTION_MACROS tag is set to YES then doxygen's preprocessor will
 # remove all references to function-like macros that are alone on a line, have
@@ -2129,13 +2128,13 @@ SKIP_FUNCTION_MACROS   = YES
 # the path). If a tag file is not located in the directory in which doxygen is
 # run, you must also specify the path to the tagfile here.
 
-TAGFILES               = 
+TAGFILES               =
 
 # When a file name is specified after GENERATE_TAGFILE, doxygen will create a
 # tag file that is based on the input files it reads. See section "Linking to
 # external documentation" for more information about the usage of tag files.
 
-GENERATE_TAGFILE       = 
+GENERATE_TAGFILE       =
 
 # If the ALLEXTERNALS tag is set to YES, all external class will be listed in
 # the class index. If set to NO, only the inherited external classes will be
@@ -2184,14 +2183,14 @@ CLASS_DIAGRAMS         = YES
 # the mscgen tool resides. If left empty the tool is assumed to be found in the
 # default search path.
 
-MSCGEN_PATH            = 
+MSCGEN_PATH            =
 
 # You can include diagrams made with dia in doxygen documentation. Doxygen will
 # then run dia to produce the diagram and insert it in the documentation. The
 # DIA_PATH tag allows you to specify the directory where the dia binary resides.
 # If left empty dia is assumed to be found in the default search path.
 
-DIA_PATH               = 
+DIA_PATH               =
 
 # If set to YES the inheritance and collaboration graphs will hide inheritance
 # and usage relations if the target is undocumented or is not a class.
@@ -2240,7 +2239,7 @@ DOT_FONTSIZE           = 10
 # the path where dot can find it using this tag.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
-DOT_FONTPATH           = 
+DOT_FONTPATH           =
 
 # If the CLASS_GRAPH tag is set to YES then doxygen will generate a graph for
 # each documented class showing the direct and indirect inheritance relations.
@@ -2384,26 +2383,26 @@ INTERACTIVE_SVG        = NO
 # found. If left blank, it is assumed the dot tool can be found in the path.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
-DOT_PATH               = 
+DOT_PATH               =
 
 # The DOTFILE_DIRS tag can be used to specify one or more directories that
 # contain dot files that are included in the documentation (see the \dotfile
 # command).
 # This tag requires that the tag HAVE_DOT is set to YES.
 
-DOTFILE_DIRS           = 
+DOTFILE_DIRS           =
 
 # The MSCFILE_DIRS tag can be used to specify one or more directories that
 # contain msc files that are included in the documentation (see the \mscfile
 # command).
 
-MSCFILE_DIRS           = 
+MSCFILE_DIRS           =
 
 # The DIAFILE_DIRS tag can be used to specify one or more directories that
 # contain dia files that are included in the documentation (see the \diafile
 # command).
 
-DIAFILE_DIRS           = 
+DIAFILE_DIRS           =
 
 # When using plantuml, the PLANTUML_JAR_PATH tag should be used to specify the
 # path where java can find the plantuml.jar file. If left blank, it is assumed
@@ -2411,12 +2410,12 @@ DIAFILE_DIRS           =
 # generate a warning when it encounters a \startuml command in this case and
 # will not generate output for the diagram.
 
-PLANTUML_JAR_PATH      = 
+PLANTUML_JAR_PATH      =
 
 # When using plantuml, the specified paths are searched for files specified by
 # the !include statement in a plantuml block.
 
-PLANTUML_INCLUDE_PATH  = 
+PLANTUML_INCLUDE_PATH  =
 
 # The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of nodes
 # that will be shown in the graph. If the number of nodes in a graph becomes
diff --git a/StyleGuide.md b/StyleGuide.md
index 58e412895..5de815f88 100644
--- a/StyleGuide.md
+++ b/StyleGuide.md
@@ -1,14 +1,13 @@
 # Embedded Learning Library (ELL) style guide
 
-## Python 
+## Python
 
 We use [PEP 8](https://www.python.org/dev/peps/pep-0008/) with one exception, we extend the 80 char line limit to 120.
 
-## C++ 
+## C++
 
 ### File names and extensions
-Header files use the extension ".h". Source code files use the extension ".cpp" if they are compiled into a .lib or .exe, and ".tcc" if they
-contain templated source code.  Each file should typically contain a single class and its name should match the class name.
+Header files use the extension ".h". Definitions of functions that must be in the header should be moved to the end of the header file in a region surrounded by `#pragma region implementation`/`#pragma endregion implementation` blocks. Source code files use the extension ".cpp" if they are compiled into a .lib or .exe.  Each file should typically contain a single class and its name should match the class name.
 
 ### Projects
 A project is a set of source files that compile into a single executable or library, or that implement a single template library.
@@ -27,7 +26,6 @@ Each project is contained in a directory. The directory contains a CMakeLists.tx
 
 * "include", for h files
 * "src" for cpp files (unless the project defines a template library without cpp files)
-* "tcc" for tcc files (these define implementations of template classes)
 * "test" for source files that define unit tests, invoked via ctest
 * "doc" for documentation that does not fit in the source files themselves. For example, complex math algorithms may require detailed documentation in a LaTeX file, which would live in the doc directory
 
@@ -108,14 +106,14 @@ in a comment after the `#endif`:
     #endif // MATRIX_H
 
 ### Function implementations
-Almost all function implementations belong in .cpp and .tcc files. The exception is short single-instruction implementations of
+Almost all function implementations belong in .cpp and in the implementation region of .h files. The exception is short single-instruction implementations of
 parameterless functions, which should appear in the .h file on the same line as the function declaration. For example:
 
     double GetValue() const { return _value; }  // inline implementation in .h file
 
-    double SetValue(double value);  // function has parameters - implementation belongs in .cpp or .tcc file
+    double SetValue(double value);  // function has parameters - implementation belongs in .cpp or the implementation region of the .h file
 
-    void Next() { _iterator++; _count++; }  // wrong: multi-instruction implementations belong in .cpp or .tcc files
+    void Next() { _iterator++; _count++; }  // wrong: multi-instruction implementations belong in .cpp or the implementation region of the .h files
 
     int GetIndex()  // wrong: inline implementation should occupy a single line
     {
@@ -158,6 +156,6 @@ three slashes (///) and the first line in a documentation block should contain a
 
 ### Error Handling
 
-The c "assert" function usually results in immediate termination of the program, so this should only be used in cases where it should never happen unless there is a logic error in our code.  To this end assert documents the existing invariants, preconditions and post conditions in the code.  
+The c "assert" function usually results in immediate termination of the program, so this should only be used in cases where it should never happen unless there is a logic error in our code.  To this end assert documents the existing invariants, preconditions and post conditions in the code.
 
-Bad input parameters from our public API or bad data read from a file should not result in this kind of assert termination because it is hard to debug, and it provides no useful information to the caller.   Instead, throw the appropriate type of exception as defined in ~/libraries/utilities/include/Exception.h.  This includes the notImplemented case, which you can raise using throw utilities::LogicException(utilities::LogicExceptionErrors::notImplemented);
\ No newline at end of file
+Bad input parameters from our public API or bad data read from a file should not result in this kind of assert termination because it is hard to debug, and it provides no useful information to the caller.   Instead, throw the appropriate type of exception as defined in ~/libraries/utilities/include/Exception.h.  This includes the notImplemented case, which you can raise using throw utilities::LogicException(utilities::LogicExceptionErrors::notImplemented);
diff --git a/interfaces/CMakeLists.txt b/interfaces/CMakeLists.txt
index 0397a5e36..4a08569b4 100644
--- a/interfaces/CMakeLists.txt
+++ b/interfaces/CMakeLists.txt
@@ -36,14 +36,6 @@ foreach(src ${_sources})
     list(APPEND INTERFACE_INCLUDE "${CMAKE_CURRENT_LIST_DIR}/${src}")
 endforeach()
 
-set(_sources
-    common/tcc/CallbackInterface.tcc
-    common/tcc/ModelInterface.tcc
-)
-foreach(src ${_sources})
-    list(APPEND INTERFACE_TCC "${CMAKE_CURRENT_LIST_DIR}/${src}")
-endforeach()
-
 set(INTERFACE_MAIN "${CMAKE_CURRENT_LIST_DIR}/common/ell.i")
 
 set(_sources
@@ -112,7 +104,7 @@ set(INTERFACE_DEPENDENCIES
 
 # Add ELL library include directories
 include_directories(common/include)
-include_directories(../libraries/)
+include_directories(../libraries)
 
 add_subdirectory(common)
 add_subdirectory(python)
diff --git a/interfaces/common/CMakeLists.txt b/interfaces/common/CMakeLists.txt
index 3954f0fd1..a829438ff 100644
--- a/interfaces/common/CMakeLists.txt
+++ b/interfaces/common/CMakeLists.txt
@@ -8,7 +8,5 @@ set (INTERFACE_SRC src/ModelBuilderInterface.cpp
                    src/NeuralNetworkPredictorInterface.cpp
                    src/TrainerInterface.cpp
                    src/DatasetInterface.cpp)
-set (INTERFACE_TCC tcc/CallbackInterface.tcc
-                   tcc/ModelInterface.tcc)
 
 generate_interface(common ell ${CMAKE_CURRENT_SOURCE_DIR} "" "" "")
diff --git a/interfaces/common/include/CallbackInterface.h b/interfaces/common/include/CallbackInterface.h
index f582a1809..c68dc9298 100644
--- a/interfaces/common/include/CallbackInterface.h
+++ b/interfaces/common/include/CallbackInterface.h
@@ -118,4 +118,110 @@ namespace api
 } // namespace api
 } // namespace ell
 
-#include "../tcc/CallbackInterface.tcc"
+#pragma region implementation
+
+#ifndef SWIG
+
+#include <assert.h>
+#include <stdexcept>
+
+#endif
+
+namespace ell
+{
+namespace api
+{
+    //////////////////////////////////////////////////////////////////////////
+    // Api classes for callback forwarding
+    //////////////////////////////////////////////////////////////////////////
+
+    template <typename InputType, typename OutputType>
+    CallbackForwarder<InputType, OutputType>::CallbackForwarder() :
+        _inputCallback(nullptr),
+        _outputCallback(nullptr),
+        _lagCallback(nullptr)
+    {
+    }
+
+    template <typename InputType, typename OutputType>
+    void CallbackForwarder<InputType, OutputType>::Register(CallbackBase<InputType>& inputCallback,
+                                                            size_t inputSize,
+                                                            CallbackBase<OutputType>& outputCallback,
+                                                            size_t outputSize,
+                                                            CallbackBase<TimeTickType>& lagCallback)
+    {
+        // Caller owns the lifetime of these objects
+        _inputCallback = &inputCallback;
+        _outputCallback = &outputCallback;
+        _lagCallback = &lagCallback;
+
+        _inputBuffer.resize(inputSize);
+        _outputBuffer.resize(outputSize);
+    }
+
+    template <typename InputType, typename OutputType>
+    void CallbackForwarder<InputType, OutputType>::Clear()
+    {
+        _inputCallback = nullptr;
+        _outputCallback = nullptr;
+        _lagCallback = nullptr;
+
+        _inputBuffer.resize(0);
+        _outputBuffer.resize(0);
+    }
+
+    template <typename InputType, typename OutputType>
+    bool CallbackForwarder<InputType, OutputType>::InvokeInput(InputType* buffer)
+    {
+        if (_inputCallback == nullptr)
+        {
+            throw std::invalid_argument("Register has not yet been called");
+        }
+
+        bool result = _inputCallback->Run(_inputBuffer);
+        if (result)
+        {
+            // EFFICIENCY: any way to avoid the copy?
+            std::copy(_inputBuffer.begin(), _inputBuffer.end(), buffer);
+        }
+        return result;
+    }
+
+    template <typename InputType, typename OutputType>
+    void CallbackForwarder<InputType, OutputType>::InvokeOutput(const OutputType* buffer)
+    {
+        if (_outputCallback == nullptr)
+        {
+            throw std::invalid_argument("Register has not yet been called");
+        }
+
+        // EFFICIENCY: any way to avoid the copy?
+        _outputBuffer.assign(buffer, buffer + _outputBuffer.size());
+        _outputCallback->Run(_outputBuffer);
+    }
+
+    template <typename InputType, typename OutputType>
+    void CallbackForwarder<InputType, OutputType>::InvokeOutput(OutputType value)
+    {
+        if (_outputCallback == nullptr)
+        {
+            throw std::invalid_argument("Register has not yet been called");
+        }
+
+        _outputCallback->Run(value);
+    }
+
+    template <typename InputType, typename OutputType>
+    void CallbackForwarder<InputType, OutputType>::InvokeLagNotification(TimeTickType value)
+    {
+        if (_lagCallback == nullptr)
+        {
+            throw std::invalid_argument("Register has not yet been called");
+        }
+
+        _lagCallback->Run(value);
+    }
+} // namespace api
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/interfaces/common/include/ModelInterface.h b/interfaces/common/include/ModelInterface.h
index 7f08389e7..cdd9adb08 100644
--- a/interfaces/common/include/ModelInterface.h
+++ b/interfaces/common/include/ModelInterface.h
@@ -492,4 +492,102 @@ struct ModelOptimizerOptions
 
 } // namespace ELL_API
 
-#include "../tcc/ModelInterface.tcc"
+#pragma region implementation
+
+#ifndef SWIG
+
+#include <model/include/IRMapCompiler.h>
+
+#include <nodes/include/SinkNode.h>
+#include <nodes/include/SourceNode.h>
+
+namespace ELL_API
+{
+
+//
+// Map
+//
+template <typename ElementType>
+void Map::SetSourceCallback(ell::api::CallbackBase<ElementType>& callback, size_t index)
+{
+    auto nodes = GetModel().GetModel().GetNodesByType<ell::nodes::SourceNode<ElementType>>();
+    if (nodes.size() == 0)
+    {
+        std::string name = typeid(ElementType).name();
+        throw std::invalid_argument("Cannot set SourceCallback because model has no SourceNode of type '" + name + "'");
+    }
+    nodes.at(index)->SetSourceFunction([&callback](auto& input) {
+        return callback.Run(input);
+    });
+}
+
+template <typename ElementType>
+void Map::SetSinkCallback(ell::api::CallbackBase<ElementType>& callback, size_t index)
+{
+    auto nodes = GetModel().GetModel().GetNodesByType<ell::nodes::SinkNode<ElementType>>();
+
+    if (nodes.size() == 0)
+    {
+        std::string name = typeid(ElementType).name();
+        throw std::invalid_argument("Cannot set SinkCallback because model has no SinkNode of type '" + name + "'");
+    }
+
+    nodes.at(index)->SetSinkFunction([&callback](const std::vector<ElementType>& output) {
+        // Reason for the const_cast:
+        // SWIG warns that the const overload gets shadowed, so CallbackBase only
+        // provides a non-const vector overload for Run.
+        callback.Run(const_cast<std::vector<ElementType>&>(output));
+    });
+}
+
+template <typename ElementType>
+void Map::Step(ell::api::TimeTickType timestamp)
+{
+    std::vector<ell::api::TimeTickType> input = { timestamp };
+    _map->Compute<ElementType>(input);
+}
+
+//
+// CompiledMap
+//
+template <typename ElementType>
+void CompiledMap::Step(ell::api::TimeTickType timestamp)
+{
+    // Note: casting TimeTickType to match input and output port types
+    std::vector<ell::api::TimeTickType> input = { timestamp };
+    _map->SetContext(this);
+    _map->Compute<ElementType>(input);
+}
+
+template <typename ElementType>
+void CompiledMap::RegisterCallbacks(
+    ell::api::CallbackBase<ElementType>& inputCallback,
+    ell::api::CallbackBase<ElementType>& outputCallback)
+{
+    ell::api::CallbackBase<ell::api::TimeTickType> unusedLagCallback;
+    GetCallbackForwarder<ElementType>().Register(inputCallback, _inputShape.Size(), outputCallback, _outputShape.Size(), unusedLagCallback);
+}
+
+template <typename ElementType>
+void CompiledMap::UnregisterCallbacks()
+{
+    GetCallbackForwarder<ElementType>().Clear();
+}
+
+template <typename ElementType>
+bool CompiledMap::InvokeSourceCallback(ElementType* input)
+{
+    return GetCallbackForwarder<ElementType>().InvokeInput(input);
+}
+
+template <typename ElementType>
+void CompiledMap::InvokeSinkCallback(ElementType* output)
+{
+    GetCallbackForwarder<ElementType>().InvokeOutput(output);
+}
+
+} // namespace ELL_API
+
+#endif // SWIG
+
+#pragma endregion implementation
diff --git a/interfaces/common/tcc/CallbackInterface.tcc b/interfaces/common/tcc/CallbackInterface.tcc
deleted file mode 100644
index 63dd80eb9..000000000
--- a/interfaces/common/tcc/CallbackInterface.tcc
+++ /dev/null
@@ -1,111 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     CallbackInterface.tcc (interfaces)
-//  Authors:  Lisa Ong
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#ifndef SWIG
-
-#include <assert.h>
-#include <stdexcept>
-
-#endif
-
-namespace ell
-{
-namespace api
-{
-    //////////////////////////////////////////////////////////////////////////
-    // Api classes for callback forwarding
-    //////////////////////////////////////////////////////////////////////////
-
-    template <typename InputType, typename OutputType>
-    CallbackForwarder<InputType, OutputType>::CallbackForwarder() :
-        _inputCallback(nullptr),
-        _outputCallback(nullptr),
-        _lagCallback(nullptr)
-    {
-    }
-
-    template <typename InputType, typename OutputType>
-    void CallbackForwarder<InputType, OutputType>::Register(CallbackBase<InputType>& inputCallback,
-                                                            size_t inputSize,
-                                                            CallbackBase<OutputType>& outputCallback,
-                                                            size_t outputSize,
-                                                            CallbackBase<TimeTickType>& lagCallback)
-    {
-        // Caller owns the lifetime of these objects
-        _inputCallback = &inputCallback;
-        _outputCallback = &outputCallback;
-        _lagCallback = &lagCallback;
-
-        _inputBuffer.resize(inputSize);
-        _outputBuffer.resize(outputSize);
-    }
-
-    template <typename InputType, typename OutputType>
-    void CallbackForwarder<InputType, OutputType>::Clear()
-    {
-        _inputCallback = nullptr;
-        _outputCallback = nullptr;
-        _lagCallback = nullptr;
-
-        _inputBuffer.resize(0);
-        _outputBuffer.resize(0);
-    }
-
-    template <typename InputType, typename OutputType>
-    bool CallbackForwarder<InputType, OutputType>::InvokeInput(InputType* buffer)
-    {
-        if (_inputCallback == nullptr)
-        {
-            throw std::invalid_argument("Register has not yet been called");
-        }
-
-        bool result = _inputCallback->Run(_inputBuffer);
-        if (result)
-        {
-            // EFFICIENCY: any way to avoid the copy?
-            std::copy(_inputBuffer.begin(), _inputBuffer.end(), buffer);
-        }
-        return result;
-    }
-
-    template <typename InputType, typename OutputType>
-    void CallbackForwarder<InputType, OutputType>::InvokeOutput(const OutputType* buffer)
-    {
-        if (_outputCallback == nullptr)
-        {
-            throw std::invalid_argument("Register has not yet been called");
-        }
-
-        // EFFICIENCY: any way to avoid the copy?
-        _outputBuffer.assign(buffer, buffer + _outputBuffer.size());
-        _outputCallback->Run(_outputBuffer);
-    }
-
-    template <typename InputType, typename OutputType>
-    void CallbackForwarder<InputType, OutputType>::InvokeOutput(OutputType value)
-    {
-        if (_outputCallback == nullptr)
-        {
-            throw std::invalid_argument("Register has not yet been called");
-        }
-
-        _outputCallback->Run(value);
-    }
-
-    template <typename InputType, typename OutputType>
-    void CallbackForwarder<InputType, OutputType>::InvokeLagNotification(TimeTickType value)
-    {
-        if (_lagCallback == nullptr)
-        {
-            throw std::invalid_argument("Register has not yet been called");
-        }
-
-        _lagCallback->Run(value);
-    }
-} // namespace api
-} // namespace ell
diff --git a/interfaces/common/tcc/ModelInterface.tcc b/interfaces/common/tcc/ModelInterface.tcc
deleted file mode 100644
index 7d4f2ca3e..000000000
--- a/interfaces/common/tcc/ModelInterface.tcc
+++ /dev/null
@@ -1,103 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     ModelInterface.tcc (interfaces)
-//  Authors:  Chuck Jacobs, Kirk Olynyk, Lisa Ong
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#ifndef SWIG
-
-#include <model/include/IRMapCompiler.h>
-
-#include <nodes/include/SinkNode.h>
-#include <nodes/include/SourceNode.h>
-
-namespace ELL_API
-{
-
-//
-// Map
-//
-template <typename ElementType>
-void Map::SetSourceCallback(ell::api::CallbackBase<ElementType>& callback, size_t index)
-{
-    auto nodes = GetModel().GetModel().GetNodesByType<ell::nodes::SourceNode<ElementType>>();
-    if (nodes.size() == 0)
-    {
-        std::string name = typeid(ElementType).name();
-        throw std::invalid_argument("Cannot set SourceCallback because model has no SourceNode of type '" + name + "'");
-    }
-    nodes.at(index)->SetSourceFunction([&callback](auto& input) {
-        return callback.Run(input);
-    });
-}
-
-template <typename ElementType>
-void Map::SetSinkCallback(ell::api::CallbackBase<ElementType>& callback, size_t index)
-{
-    auto nodes = GetModel().GetModel().GetNodesByType<ell::nodes::SinkNode<ElementType>>();
-
-    if (nodes.size() == 0)
-    {
-        std::string name = typeid(ElementType).name();
-        throw std::invalid_argument("Cannot set SinkCallback because model has no SinkNode of type '" + name + "'");
-    }
-
-    nodes.at(index)->SetSinkFunction([&callback](const std::vector<ElementType>& output) {
-        // Reason for the const_cast:
-        // SWIG warns that the const overload gets shadowed, so CallbackBase only
-        // provides a non-const vector overload for Run.
-        callback.Run(const_cast<std::vector<ElementType>&>(output));
-    });
-}
-
-template <typename ElementType>
-void Map::Step(ell::api::TimeTickType timestamp)
-{
-    std::vector<ell::api::TimeTickType> input = { timestamp };
-    _map->Compute<ElementType>(input);
-}
-
-//
-// CompiledMap
-//
-template <typename ElementType>
-void CompiledMap::Step(ell::api::TimeTickType timestamp)
-{
-    // Note: casting TimeTickType to match input and output port types
-    std::vector<ell::api::TimeTickType> input = { timestamp };
-    _map->SetContext(this);
-    _map->Compute<ElementType>(input);
-}
-
-template <typename ElementType>
-void CompiledMap::RegisterCallbacks(
-    ell::api::CallbackBase<ElementType>& inputCallback,
-    ell::api::CallbackBase<ElementType>& outputCallback)
-{
-    ell::api::CallbackBase<ell::api::TimeTickType> unusedLagCallback;
-    GetCallbackForwarder<ElementType>().Register(inputCallback, _inputShape.Size(), outputCallback, _outputShape.Size(), unusedLagCallback);
-}
-
-template <typename ElementType>
-void CompiledMap::UnregisterCallbacks()
-{
-    GetCallbackForwarder<ElementType>().Clear();
-}
-
-template <typename ElementType>
-bool CompiledMap::InvokeSourceCallback(ElementType* input)
-{
-    return GetCallbackForwarder<ElementType>().InvokeInput(input);
-}
-
-template <typename ElementType>
-void CompiledMap::InvokeSinkCallback(ElementType* output)
-{
-    GetCallbackForwarder<ElementType>().InvokeOutput(output);
-}
-
-} // namespace ELL_API
-
-#endif // SWIG
\ No newline at end of file
diff --git a/interfaces/python/CMakeLists.txt b/interfaces/python/CMakeLists.txt
index c002875eb..738ae8c5c 100644
--- a/interfaces/python/CMakeLists.txt
+++ b/interfaces/python/CMakeLists.txt
@@ -53,7 +53,6 @@ if (TARGET _ELL_python)
       POST_BUILD
       COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/CMake/OpenBLASSetup.cmake ${DEPLOYDIR}/OpenBLASSetup.cmake
       COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/interfaces/common/include/CallbackInterface.h ${DEPLOYDIR}/include/CallbackInterface.h
-      COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/interfaces/common/tcc/CallbackInterface.tcc ${DEPLOYDIR}/tcc/CallbackInterface.tcc
       COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/tools/wrap/templates/CMakeLists.python.txt.in ${DEPLOYDIR}/CMakeLists.python.txt.in
   )
 
diff --git a/interfaces/python/package/ell/rpi_magic.py b/interfaces/python/package/ell/rpi_magic.py
index 355db0da3..8c7b98942 100644
--- a/interfaces/python/package/ell/rpi_magic.py
+++ b/interfaces/python/package/ell/rpi_magic.py
@@ -1,5 +1,5 @@
 """
-A Jupyter magic to allow code to be copied and deployed on the Raspberry Pi directly 
+A Jupyter magic to allow code to be copied and deployed on the Raspberry Pi directly
 from the notebook.
 """
 import tempfile
@@ -12,6 +12,7 @@
 from ipywidgets import Button, HBox, Label, Output, Layout
 from IPython.core.magic import Magics, magics_class, cell_magic
 
+
 @magics_class
 class RaspberryPi(Magics):
     remote_pid = None
@@ -54,9 +55,8 @@ def report_progress(sofar, total):
                 local_path = rpi_path + '/' + os.path.basename(file)
                 # I don't like this logic :(
                 if (file.find('/include/') >= 0):
-                    local_path = rpi_path + '/include/' + os.path.basename(file)
-                if (file.find('/tcc/') >= 0):
-                    local_path = rpi_path + '/tcc/' + os.path.basename(file)
+                    local_path = rpi_path + '/include/' + \
+                        os.path.basename(file)
                 sftp.put(file, local_path, callback=report_progress)
 
     def copy_model_to_rpi(self, model, rpi_path):
@@ -64,7 +64,6 @@ def copy_model_to_rpi(self, model, rpi_path):
         files = model.files(platform.PI3) + [
             pkgdir + '/deploy/OpenBLASSetup.cmake',
             pkgdir + '/deploy/include/CallbackInterface.h',
-            pkgdir + '/deploy/tcc/CallbackInterface.tcc',
             pkgdir + '/util/tutorialHelpers.py',
         ]
 
@@ -81,7 +80,7 @@ def copy_model_to_rpi(self, model, rpi_path):
                 return
 
         self.remote_command('rm -r -f ' + rpi_path + '; mkdir -p ' + rpi_path +
-                            '/include; mkdir -p ' + rpi_path + '/tcc')
+                            '/include')
         self.remote_copy(files, rpi_path)
         self.feedback('Building...')
         self.remote_command('cd ' + rpi_path +
@@ -96,7 +95,7 @@ def remote_eval(self, command):
 
     def print_output(self, line):
         line = line.strip('\n')
-        print(line)            
+        print(line)
         sys.stdout.flush()
 
     def remote_command(self, command):
@@ -126,7 +125,7 @@ def remote_command_async(self, command, callback):
 
     @cell_magic
     def rpi(self, line, cell):
-                
+
         from IPython.core.display import display
 
         'provide a user interface for remotely executing code on the RPi'
@@ -163,25 +162,28 @@ def stop_process(b):
             self.status_label.value = 'Running'
             self.remote_command_async(
                 'cd ' + rpi_path + '; ' +
-                'source /home/pi/miniconda3/envs/py34/bin/activate py34 > /dev/null 2>&1; ' + 
+                'source /home/pi/miniconda3/envs/py34/bin/activate py34 > /dev/null 2>&1; ' +
                 'echo running remote python script...; ' +
-                'python3 actuation.py', 
-                lambda:self.on_job_complete(stop_button))
+                'python3 actuation.py',
+                lambda: self.on_job_complete(stop_button))
         except paramiko.AuthenticationException:
             self.feedback(
                 'Authentication failed. Wrong password? Evaluate the cell to try again.'
             )
             self.password = None
         except TimeoutError:
-            self.feedback('Timeout while trying to reach the Raspberry Pi. Wrong IP address?')
+            self.feedback(
+                'Timeout while trying to reach the Raspberry Pi. Wrong IP address?')
         except:
             errorType, value, traceback = sys.exc_info()
-            self.feedback("### Exception: " + str(errorType) + ": " + str(value))
+            self.feedback("### Exception: " +
+                          str(errorType) + ": " + str(value))
 
     def on_job_complete(self, stop_button):
         stop_button.description = "Completed"
         stop_button.disabled = True
 
+
 def init_magics():
     try:
         ipy = get_ipython()
diff --git a/interfaces/python/package/ell/util/pretrained_model.py b/interfaces/python/package/ell/util/pretrained_model.py
index af98834b9..4cd129b53 100644
--- a/interfaces/python/package/ell/util/pretrained_model.py
+++ b/interfaces/python/package/ell/util/pretrained_model.py
@@ -35,7 +35,7 @@ def rename(self, new_name):
     def download(self, local_path, rename=None, cache=True):
         """Download the model from Github and unzip it"""
         import urllib.request
-        
+
         self.local_path = local_path
         os.makedirs(local_path, exist_ok=True)
         local_file = os.path.join(local_path, self.name + '.ell')
@@ -45,7 +45,7 @@ def download(self, local_path, rename=None, cache=True):
             urllib.request.urlretrieve(
                 'https://github.com/Microsoft/ELL-models/raw/master/models/ILSVRC2012/categories.txt',
                 self.labels_path)
-            
+
         if not cache or not os.path.exists(local_file):
             print('downloading model ' + self.model_name + ' ...', flush=True)
             zip_path, _ = urllib.request.urlretrieve(
@@ -93,14 +93,14 @@ def compile(self, target):
 
             if not os.path.exists(outpath + '.bc'):
                 raise Exception("compile failed to produce output file: " +
-                    os.path.exists(outpath + '.bc')) 
+                    os.path.exists(outpath + '.bc'))
 
             if _buildtools.swig(outdir, self.name, 'python') is None:
                 return None
-            out_file = _buildtools.opt(outdir, outpath + '.bc') 
+            out_file = _buildtools.opt(outdir, outpath + '.bc')
             if out_file is None:
                 return None
-            out_file = _buildtools.llc(outdir, out_file, target) 
+            out_file = _buildtools.llc(outdir, out_file, target)
             if out_file is None:
                 return None
             return self.create_cmake_file(target)
@@ -121,8 +121,6 @@ def build(self, target='host'):
             shutil.copyfile(os.path.join(pkg_dir, 'OpenBLASSetup.cmake'), 'OpenBLASSetup.cmake')
             if not os.path.exists('include'):
                 shutil.copytree(os.path.join(pkg_dir, 'include'), 'include')
-            if not os.path.exists('tcc'):
-                shutil.copytree(os.path.join(pkg_dir, 'tcc'), 'tcc')
             if _is_windows(target):
                 _buildtools.run(['cmake', '-G', 'Visual Studio 14 2015 Win64', '-DPROCESSOR_HINT=haswell', '.'], shell=True)
                 _buildtools.run(['cmake', '--build', '.', '--config', 'Release'], shell=True)
@@ -136,7 +134,7 @@ def create_cmake_file(self, target):
         cmake_template = os.path.join(self.deploy_dir, 'CMakeLists.python.txt.in')
         with open(cmake_template) as f:
             template = f.read()
-        
+
         template = template.replace("@ELL_outdir@", self.name)
         template = template.replace("@ELL_model@", self.name)
         template = template.replace("@ELL_model_name@", self.name)
diff --git a/libraries/common/CMakeLists.txt b/libraries/common/CMakeLists.txt
index af29a3e8b..252d3dcbe 100644
--- a/libraries/common/CMakeLists.txt
+++ b/libraries/common/CMakeLists.txt
@@ -43,19 +43,10 @@ set(include
     include/ProtoNNTrainerArguments.h
 )
 
-set(tcc
-    tcc/AppendNodeToModel.tcc
-    tcc/DataLoaders.tcc
-    tcc/LoadModel.tcc
-    tcc/MakeEvaluator.tcc
-    tcc/ParametersEnumerator.tcc
-)
-
 source_group("src" FILES ${src})
 source_group("include" FILES ${include})
-source_group("tcc" FILES ${tcc})
 
-add_library(${library_name} ${src} ${include} ${tcc})
+add_library(${library_name} ${src} ${include})
 target_include_directories(${library_name} PRIVATE include ${ELL_LIBRARIES_DIR})
 target_link_libraries(${library_name} data utilities functions model nodes predictors evaluators trainers)
 
diff --git a/libraries/common/include/AppendNodeToModel.h b/libraries/common/include/AppendNodeToModel.h
index 1960d5533..5718f46c4 100644
--- a/libraries/common/include/AppendNodeToModel.h
+++ b/libraries/common/include/AppendNodeToModel.h
@@ -28,4 +28,23 @@ namespace common
 } // namespace common
 } // namespace ell
 
-#include "../tcc/AppendNodeToModel.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace common
+{
+    template <typename PredictorNodeType, typename PredictorType>
+    model::Model AppendNodeToModel(const model::Map& map, const PredictorType& predictor)
+    {
+        model::TransformContext context;
+        model::ModelTransformer transformer;
+        auto model = transformer.CopyModel(map.GetModel(), context);
+        auto mapOutput = map.GetOutputElements<double>(0);
+        model.AddNode<PredictorNodeType>(mapOutput, predictor);
+        return model;
+    }
+} // namespace common
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/common/include/DataLoaders.h b/libraries/common/include/DataLoaders.h
index e05e2869f..6ea0286ba 100644
--- a/libraries/common/include/DataLoaders.h
+++ b/libraries/common/include/DataLoaders.h
@@ -91,4 +91,124 @@ namespace common
 } // namespace common
 } // namespace ell
 
-#include "../tcc/DataLoaders.tcc"
+#pragma region implementation
+
+#include <data/include/SingleLineParsingExampleIterator.h>
+
+#include <model/include/IRCompiledMap.h>
+#include <model/include/IRMapCompiler.h>
+
+// nodes
+#include <nodes/include/ClockNode.h> // for nodes::TimeTickType
+
+namespace ell
+{
+namespace common
+{
+    template <typename TextLineIteratorType, typename MetadataParserType, typename DataVectorParserType>
+    auto GetExampleIterator(std::istream& stream)
+    {
+        TextLineIteratorType textLineIterator(stream);
+
+        MetadataParserType metadataParser;
+
+        DataVectorParserType dataVectorParser;
+
+        return data::MakeSingleLineParsingExampleIterator(std::move(textLineIterator), std::move(metadataParser), std::move(dataVectorParser));
+    }
+
+    template <typename ExampleType, typename MapType>
+    auto TransformDataset(data::Dataset<ExampleType>& input, const MapType& map)
+    {
+        return input.template Transform<ExampleType>([map](const ExampleType& example) {
+            auto transformedDataVector = map.template Compute<data::DoubleDataVector>(example.GetDataVector());
+            return ExampleType(std::move(transformedDataVector), example.GetMetadata());
+        });
+    }
+
+    namespace detail
+    {
+        // Context used by callback functions
+        struct CallbackContext
+        {
+            std::vector<double> inputValues;
+        };
+    } // namespace detail
+
+    // C functions called by compiled maps
+    extern "C" {
+    inline bool InputCallback_Double(void* context, double* input)
+    {
+        auto dataContext = static_cast<detail::CallbackContext*>(context);
+        std::copy(dataContext->inputValues.begin(), dataContext->inputValues.end(), input);
+        return true;
+    }
+
+    inline bool InputCallback_Float(void* context, float* input)
+    {
+        auto dataContext = static_cast<detail::CallbackContext*>(context);
+        std::transform(dataContext->inputValues.begin(), dataContext->inputValues.end(), input, [](double val) { return static_cast<float>(val); });
+        return true;
+    }
+    }
+
+    namespace detail
+    {
+        // Sets up the function address that the LLVM jit will call for the source function callback
+        // Note that this only supports a single source node, but can be extended in the future
+        // to support multiple source nodes (e.g. by switching the function on node id).
+        template <typename MapType>
+        void ResolveInputCallback(const MapType& map, llvm::Module* module, ell::emitters::IRExecutionEngine& jitter)
+        {
+            const std::string defaultCallbackName("ELL_InputCallback");
+            auto callback = module->getFunction(defaultCallbackName);
+
+            ptrdiff_t callbackAddress = 0;
+            switch (map.GetInputType())
+            {
+            case model::Port::PortType::smallReal:
+            {
+                callbackAddress = reinterpret_cast<ptrdiff_t>(&InputCallback_Float);
+                break;
+            }
+            case model::Port::PortType::real:
+            {
+                callbackAddress = reinterpret_cast<ptrdiff_t>(&InputCallback_Double);
+                break;
+            }
+            default:
+                throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "Unexpected source input type for model. Should be double or float.");
+            }
+
+            jitter.DefineFunction(callback, callbackAddress);
+        }
+    } // namespace detail
+
+    template <typename ExampleType, typename MapType>
+    auto TransformDatasetWithCompiledMap(data::Dataset<ExampleType>& input, const MapType& map, bool useBlas)
+    {
+        ell::model::MapCompilerOptions settings;
+        settings.compilerSettings.useBlas = useBlas;
+
+        detail::CallbackContext dataContext;
+        model::IRMapCompiler compiler(settings);
+
+        auto module = compiler.GetModule().GetLLVMModule();
+        auto compiledMap = compiler.Compile(map);
+        compiledMap.SetContext(&dataContext);
+
+        // Unlike reference maps, compiled maps receive the current time as the parameter input and
+        // values through the input callback.
+        detail::ResolveInputCallback(map, module, compiledMap.GetJitter());
+
+        return input.template Transform<ExampleType>([&compiledMap, &dataContext](const ExampleType& example) {
+            dataContext.inputValues = example.GetDataVector().ToArray();
+            compiledMap.SetInputValue(0, std::vector<nodes::TimeTickType>({ 0 /*currentTime*/ }));
+            auto transformedDataVector = compiledMap.template ComputeOutput<typename ExampleType::DataVectorType>(0);
+            return ExampleType(std::move(transformedDataVector), example.GetMetadata());
+        });
+    }
+} // namespace common
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/common/include/LoadModel.h b/libraries/common/include/LoadModel.h
index cf29b8fac..8e5f3d5b4 100644
--- a/libraries/common/include/LoadModel.h
+++ b/libraries/common/include/LoadModel.h
@@ -71,4 +71,37 @@ namespace common
 } // namespace common
 } // namespace ell
 
-#include "../tcc/LoadModel.tcc"
\ No newline at end of file
+#pragma region implementation
+
+#include <utilities/include/Archiver.h>
+#include <utilities/include/Exception.h>
+#include <utilities/include/Files.h>
+#include <utilities/include/JsonArchiver.h>
+
+namespace ell
+{
+namespace common
+{
+    // STYLE internal use only from implementation, so not declared in main part of header file
+    template <typename UnarchiverType>
+    model::Map LoadArchivedMap(std::istream& stream)
+    {
+        try
+        {
+            utilities::SerializationContext context;
+            RegisterNodeTypes(context);
+            RegisterMapTypes(context);
+            UnarchiverType unarchiver(stream, context);
+            model::Map map;
+            unarchiver.Unarchive(map);
+            return map;
+        }
+        catch (const ell::utilities::Exception& ex)
+        {
+            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "Error: couldn't read file: " + ex.GetMessage());
+        }
+    }
+} // namespace common
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/common/include/MakeEvaluator.h b/libraries/common/include/MakeEvaluator.h
index 81f91de00..70e615ffa 100644
--- a/libraries/common/include/MakeEvaluator.h
+++ b/libraries/common/include/MakeEvaluator.h
@@ -45,4 +45,63 @@ namespace common
 } // namespace common
 } // namespace ell
 
-#include "../tcc/MakeEvaluator.tcc"
+#pragma region implementation
+
+// Loss functions
+#include <functions/include/HingeLoss.h>
+#include <functions/include/LogLoss.h>
+#include <functions/include/SquaredLoss.h>
+
+#include <evaluators/include/AUCAggregator.h>
+#include <evaluators/include/BinaryErrorAggregator.h>
+#include <evaluators/include/LossAggregator.h>
+
+namespace ell
+{
+namespace common
+{
+    template <typename PredictorType>
+    std::shared_ptr<evaluators::IEvaluator<PredictorType>> MakeEvaluator(const data::AnyDataset& anyDataset, const evaluators::EvaluatorParameters& evaluatorParameters, const LossFunctionArguments& lossFunctionArguments)
+    {
+        using LossFunctionEnum = common::LossFunctionArguments::LossFunction;
+
+        switch (lossFunctionArguments.lossFunction)
+        {
+        case LossFunctionEnum::squared:
+            return evaluators::MakeEvaluator<PredictorType>(anyDataset, evaluatorParameters, evaluators::BinaryErrorAggregator(), evaluators::AUCAggregator(), evaluators::MakeLossAggregator(functions::SquaredLoss()));
+
+        case LossFunctionEnum::log:
+            return evaluators::MakeEvaluator<PredictorType>(anyDataset, evaluatorParameters, evaluators::BinaryErrorAggregator(), evaluators::AUCAggregator(), evaluators::MakeLossAggregator(functions::LogLoss()));
+
+        case LossFunctionEnum::hinge:
+            return evaluators::MakeEvaluator<PredictorType>(anyDataset, evaluatorParameters, evaluators::BinaryErrorAggregator(), evaluators::AUCAggregator(), evaluators::MakeLossAggregator(functions::HingeLoss()));
+
+        default:
+            throw utilities::CommandLineParserErrorException("chosen loss function is not supported by this evaluator");
+        }
+    }
+
+    template <typename BasePredictorType>
+    std::shared_ptr<evaluators::IIncrementalEvaluator<BasePredictorType>> MakeIncrementalEvaluator(data::AutoSupervisedExampleIterator exampleIterator, const evaluators::EvaluatorParameters& evaluatorParameters, const LossFunctionArguments& lossFunctionArguments)
+    {
+        using LossFunctionEnum = common::LossFunctionArguments::LossFunction;
+
+        switch (lossFunctionArguments.lossFunction)
+        {
+        case LossFunctionEnum::squared:
+            return evaluators::MakeIncrementalEvaluator<BasePredictorType>(exampleIterator, evaluatorParameters, evaluators::BinaryErrorAggregator(), evaluators::AUCAggregator(), evaluators::MakeLossAggregator(functions::SquaredLoss()));
+
+        case LossFunctionEnum::log:
+            return evaluators::MakeIncrementalEvaluator<BasePredictorType>(exampleIterator, evaluatorParameters, evaluators::BinaryErrorAggregator(), evaluators::AUCAggregator(), evaluators::MakeLossAggregator(functions::LogLoss()));
+
+        case LossFunctionEnum::hinge:
+            return evaluators::MakeIncrementalEvaluator<BasePredictorType>(exampleIterator, evaluatorParameters, evaluators::BinaryErrorAggregator(), evaluators::AUCAggregator(), evaluators::MakeLossAggregator(functions::HingeLoss()));
+
+        default:
+            throw utilities::CommandLineParserErrorException("chosen loss function is not supported by this evaluator");
+        }
+    }
+} // namespace common
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/common/include/ParametersEnumerator.h b/libraries/common/include/ParametersEnumerator.h
index 65c7767c9..71545a165 100644
--- a/libraries/common/include/ParametersEnumerator.h
+++ b/libraries/common/include/ParametersEnumerator.h
@@ -95,4 +95,48 @@ namespace common
 } // namespace common
 } // namespace ell
 
-#include "../tcc/ParametersEnumerator.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace common
+{
+    template <typename ParametersType, typename... ValueTypes>
+    ParametersType ParametersEnumerator<ParametersType, ValueTypes...>::GenerateParameters(size_t index) const
+    {
+        ValueTupleType valueTuple;
+        SetValueTuple(valueTuple, index);
+        return GenerateParameters(valueTuple, std::make_index_sequence<std::tuple_size<ValueTupleType>::value>());
+    }
+
+    template <typename ParametersType, typename... ValueTypes>
+    std::vector<ParametersType> ParametersEnumerator<ParametersType, ValueTypes...>::GenerateParametersVector() const
+    {
+        std::vector<ParametersType> vector;
+        auto size = Size();
+        for (size_t index = 0; index < size; ++index)
+        {
+            vector.push_back(GenerateParameters(index));
+        }
+        return vector;
+    }
+
+    template <typename ParametersType, typename... ValueTypes>
+    template <size_t Index>
+    void ParametersEnumerator<ParametersType, ValueTypes...>::SetValueTuple(ValueTupleType& valueTuple, size_t index) const
+    {
+        const auto& values = std::get<Index>(_valueVectorTuple);
+        std::get<Index>(valueTuple) = values[index % values.size()];
+        SetValueTuple<Index + 1>(valueTuple, index / values.size());
+    }
+
+    template <typename ParametersType, typename... ValueTypes>
+    template <size_t... Sequence>
+    ParametersType ParametersEnumerator<ParametersType, ValueTypes...>::GenerateParameters(const ValueTupleType& valueTuple, std::index_sequence<Sequence...>) const
+    {
+        return ParametersType{ std::get<Sequence>(valueTuple)... };
+    }
+} // namespace common
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/common/tcc/AppendNodeToModel.tcc b/libraries/common/tcc/AppendNodeToModel.tcc
deleted file mode 100644
index d355733c2..000000000
--- a/libraries/common/tcc/AppendNodeToModel.tcc
+++ /dev/null
@@ -1,24 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     AppendNodeToModel.tcc (common)
-//  Authors:  Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace common
-{
-    template <typename PredictorNodeType, typename PredictorType>
-    model::Model AppendNodeToModel(const model::Map& map, const PredictorType& predictor)
-    {
-        model::TransformContext context;
-        model::ModelTransformer transformer;
-        auto model = transformer.CopyModel(map.GetModel(), context);
-        auto mapOutput = map.GetOutputElements<double>(0);
-        model.AddNode<PredictorNodeType>(mapOutput, predictor);
-        return model;
-    }
-} // namespace common
-} // namespace ell
diff --git a/libraries/common/tcc/DataLoaders.tcc b/libraries/common/tcc/DataLoaders.tcc
deleted file mode 100644
index 2e457c084..000000000
--- a/libraries/common/tcc/DataLoaders.tcc
+++ /dev/null
@@ -1,125 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     DataLoaders.tcc (common)
-//  Authors:  Ofer Dekel, Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#include <data/include/SingleLineParsingExampleIterator.h>
-
-#include <model/include/IRCompiledMap.h>
-#include <model/include/IRMapCompiler.h>
-
-// nodes
-#include <nodes/include/ClockNode.h> // for nodes::TimeTickType
-
-namespace ell
-{
-namespace common
-{
-    template <typename TextLineIteratorType, typename MetadataParserType, typename DataVectorParserType>
-    auto GetExampleIterator(std::istream& stream)
-    {
-        TextLineIteratorType textLineIterator(stream);
-
-        MetadataParserType metadataParser;
-
-        DataVectorParserType dataVectorParser;
-
-        return data::MakeSingleLineParsingExampleIterator(std::move(textLineIterator), std::move(metadataParser), std::move(dataVectorParser));
-    }
-
-    template <typename ExampleType, typename MapType>
-    auto TransformDataset(data::Dataset<ExampleType>& input, const MapType& map)
-    {
-        return input.template Transform<ExampleType>([map](const ExampleType& example) {
-            auto transformedDataVector = map.template Compute<data::DoubleDataVector>(example.GetDataVector());
-            return ExampleType(std::move(transformedDataVector), example.GetMetadata());
-        });
-    }
-
-    namespace detail
-    {
-        // Context used by callback functions
-        struct CallbackContext
-        {
-            std::vector<double> inputValues;
-        };
-    } // namespace detail
-
-    // C functions called by compiled maps
-    extern "C" {
-    inline bool InputCallback_Double(void* context, double* input)
-    {
-        auto dataContext = static_cast<detail::CallbackContext*>(context);
-        std::copy(dataContext->inputValues.begin(), dataContext->inputValues.end(), input);
-        return true;
-    }
-
-    inline bool InputCallback_Float(void* context, float* input)
-    {
-        auto dataContext = static_cast<detail::CallbackContext*>(context);
-        std::transform(dataContext->inputValues.begin(), dataContext->inputValues.end(), input, [](double val) { return static_cast<float>(val); });
-        return true;
-    }
-    }
-
-    namespace detail
-    {
-        // Sets up the function address that the LLVM jit will call for the source function callback
-        // Note that this only supports a single source node, but can be extended in the future
-        // to support multiple source nodes (e.g. by switching the function on node id).
-        template <typename MapType>
-        void ResolveInputCallback(const MapType& map, llvm::Module* module, ell::emitters::IRExecutionEngine& jitter)
-        {
-            const std::string defaultCallbackName("ELL_InputCallback");
-            auto callback = module->getFunction(defaultCallbackName);
-
-            ptrdiff_t callbackAddress = 0;
-            switch (map.GetInputType())
-            {
-            case model::Port::PortType::smallReal:
-            {
-                callbackAddress = reinterpret_cast<ptrdiff_t>(&InputCallback_Float);
-                break;
-            }
-            case model::Port::PortType::real:
-            {
-                callbackAddress = reinterpret_cast<ptrdiff_t>(&InputCallback_Double);
-                break;
-            }
-            default:
-                throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "Unexpected source input type for model. Should be double or float.");
-            }
-
-            jitter.DefineFunction(callback, callbackAddress);
-        }
-    } // namespace detail
-
-    template <typename ExampleType, typename MapType>
-    auto TransformDatasetWithCompiledMap(data::Dataset<ExampleType>& input, const MapType& map, bool useBlas)
-    {
-        ell::model::MapCompilerOptions settings;
-        settings.compilerSettings.useBlas = useBlas;
-
-        detail::CallbackContext dataContext;
-        model::IRMapCompiler compiler(settings);
-
-        auto module = compiler.GetModule().GetLLVMModule();
-        auto compiledMap = compiler.Compile(map);
-        compiledMap.SetContext(&dataContext);
-
-        // Unlike reference maps, compiled maps receive the current time as the parameter input and
-        // values through the input callback.
-        detail::ResolveInputCallback(map, module, compiledMap.GetJitter());
-
-        return input.template Transform<ExampleType>([&compiledMap, &dataContext](const ExampleType& example) {
-            dataContext.inputValues = example.GetDataVector().ToArray();
-            compiledMap.SetInputValue(0, std::vector<nodes::TimeTickType>({ 0 /*currentTime*/ }));
-            auto transformedDataVector = compiledMap.template ComputeOutput<typename ExampleType::DataVectorType>(0);
-            return ExampleType(std::move(transformedDataVector), example.GetMetadata());
-        });
-    }
-} // namespace common
-} // namespace ell
diff --git a/libraries/common/tcc/LoadModel.tcc b/libraries/common/tcc/LoadModel.tcc
deleted file mode 100644
index 0c82388c4..000000000
--- a/libraries/common/tcc/LoadModel.tcc
+++ /dev/null
@@ -1,38 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     LoadModel.tcc (common)
-//  Authors:  Lisa Ong
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#include <utilities/include/Archiver.h>
-#include <utilities/include/Exception.h>
-#include <utilities/include/Files.h>
-#include <utilities/include/JsonArchiver.h>
-
-namespace ell
-{
-namespace common
-{
-    // STYLE internal use only from .tcc, so not declared inside header file
-    template <typename UnarchiverType>
-    model::Map LoadArchivedMap(std::istream& stream)
-    {
-        try
-        {
-            utilities::SerializationContext context;
-            RegisterNodeTypes(context);
-            RegisterMapTypes(context);
-            UnarchiverType unarchiver(stream, context);
-            model::Map map;
-            unarchiver.Unarchive(map);
-            return map;
-        }
-        catch (const ell::utilities::Exception& ex)
-        {
-            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "Error: couldn't read file: " + ex.GetMessage());
-        }
-    }
-} // namespace common
-} // namespace ell
\ No newline at end of file
diff --git a/libraries/common/tcc/MakeEvaluator.tcc b/libraries/common/tcc/MakeEvaluator.tcc
deleted file mode 100644
index 1f4690d6f..000000000
--- a/libraries/common/tcc/MakeEvaluator.tcc
+++ /dev/null
@@ -1,64 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     MakeEvaluator.tcc (common)
-//  Authors:  Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-// Loss functions
-#include <functions/include/HingeLoss.h>
-#include <functions/include/LogLoss.h>
-#include <functions/include/SquaredLoss.h>
-
-#include <evaluators/include/AUCAggregator.h>
-#include <evaluators/include/BinaryErrorAggregator.h>
-#include <evaluators/include/LossAggregator.h>
-
-namespace ell
-{
-namespace common
-{
-    template <typename PredictorType>
-    std::shared_ptr<evaluators::IEvaluator<PredictorType>> MakeEvaluator(const data::AnyDataset& anyDataset, const evaluators::EvaluatorParameters& evaluatorParameters, const LossFunctionArguments& lossFunctionArguments)
-    {
-        using LossFunctionEnum = common::LossFunctionArguments::LossFunction;
-
-        switch (lossFunctionArguments.lossFunction)
-        {
-        case LossFunctionEnum::squared:
-            return evaluators::MakeEvaluator<PredictorType>(anyDataset, evaluatorParameters, evaluators::BinaryErrorAggregator(), evaluators::AUCAggregator(), evaluators::MakeLossAggregator(functions::SquaredLoss()));
-
-        case LossFunctionEnum::log:
-            return evaluators::MakeEvaluator<PredictorType>(anyDataset, evaluatorParameters, evaluators::BinaryErrorAggregator(), evaluators::AUCAggregator(), evaluators::MakeLossAggregator(functions::LogLoss()));
-
-        case LossFunctionEnum::hinge:
-            return evaluators::MakeEvaluator<PredictorType>(anyDataset, evaluatorParameters, evaluators::BinaryErrorAggregator(), evaluators::AUCAggregator(), evaluators::MakeLossAggregator(functions::HingeLoss()));
-
-        default:
-            throw utilities::CommandLineParserErrorException("chosen loss function is not supported by this evaluator");
-        }
-    }
-
-    template <typename BasePredictorType>
-    std::shared_ptr<evaluators::IIncrementalEvaluator<BasePredictorType>> MakeIncrementalEvaluator(data::AutoSupervisedExampleIterator exampleIterator, const evaluators::EvaluatorParameters& evaluatorParameters, const LossFunctionArguments& lossFunctionArguments)
-    {
-        using LossFunctionEnum = common::LossFunctionArguments::LossFunction;
-
-        switch (lossFunctionArguments.lossFunction)
-        {
-        case LossFunctionEnum::squared:
-            return evaluators::MakeIncrementalEvaluator<BasePredictorType>(exampleIterator, evaluatorParameters, evaluators::BinaryErrorAggregator(), evaluators::AUCAggregator(), evaluators::MakeLossAggregator(functions::SquaredLoss()));
-
-        case LossFunctionEnum::log:
-            return evaluators::MakeIncrementalEvaluator<BasePredictorType>(exampleIterator, evaluatorParameters, evaluators::BinaryErrorAggregator(), evaluators::AUCAggregator(), evaluators::MakeLossAggregator(functions::LogLoss()));
-
-        case LossFunctionEnum::hinge:
-            return evaluators::MakeIncrementalEvaluator<BasePredictorType>(exampleIterator, evaluatorParameters, evaluators::BinaryErrorAggregator(), evaluators::AUCAggregator(), evaluators::MakeLossAggregator(functions::HingeLoss()));
-
-        default:
-            throw utilities::CommandLineParserErrorException("chosen loss function is not supported by this evaluator");
-        }
-    }
-} // namespace common
-} // namespace ell
diff --git a/libraries/common/tcc/ParametersEnumerator.tcc b/libraries/common/tcc/ParametersEnumerator.tcc
deleted file mode 100644
index 21a9022e9..000000000
--- a/libraries/common/tcc/ParametersEnumerator.tcc
+++ /dev/null
@@ -1,49 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     ParameterGenerator.tcc (common)
-//  Authors:  Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace common
-{
-    template <typename ParametersType, typename... ValueTypes>
-    ParametersType ParametersEnumerator<ParametersType, ValueTypes...>::GenerateParameters(size_t index) const
-    {
-        ValueTupleType valueTuple;
-        SetValueTuple(valueTuple, index);
-        return GenerateParameters(valueTuple, std::make_index_sequence<std::tuple_size<ValueTupleType>::value>());
-    }
-
-    template <typename ParametersType, typename... ValueTypes>
-    std::vector<ParametersType> ParametersEnumerator<ParametersType, ValueTypes...>::GenerateParametersVector() const
-    {
-        std::vector<ParametersType> vector;
-        auto size = Size();
-        for (size_t index = 0; index < size; ++index)
-        {
-            vector.push_back(GenerateParameters(index));
-        }
-        return vector;
-    }
-
-    template <typename ParametersType, typename... ValueTypes>
-    template <size_t Index>
-    void ParametersEnumerator<ParametersType, ValueTypes...>::SetValueTuple(ValueTupleType& valueTuple, size_t index) const
-    {
-        const auto& values = std::get<Index>(_valueVectorTuple);
-        std::get<Index>(valueTuple) = values[index % values.size()];
-        SetValueTuple<Index + 1>(valueTuple, index / values.size());
-    }
-
-    template <typename ParametersType, typename... ValueTypes>
-    template <size_t... Sequence>
-    ParametersType ParametersEnumerator<ParametersType, ValueTypes...>::GenerateParameters(const ValueTupleType& valueTuple, std::index_sequence<Sequence...>) const
-    {
-        return ParametersType{ std::get<Sequence>(valueTuple)... };
-    }
-} // namespace common
-} // namespace ell
diff --git a/libraries/data/CMakeLists.txt b/libraries/data/CMakeLists.txt
index ed66cd729..b619883bc 100644
--- a/libraries/data/CMakeLists.txt
+++ b/libraries/data/CMakeLists.txt
@@ -36,30 +36,14 @@ set (include include/AutoDataVector.h
              include/WeightLabel.h
              )
 
-set (tcc tcc/AutoDataVector.tcc
-         tcc/DataVector.tcc
-         tcc/DataVectorOperations.tcc
-         tcc/DenseDataVector.tcc
-         tcc/Example.tcc
-         tcc/ExampleIterator.tcc
-         tcc/Dataset.tcc
-         tcc/SingleLineParsingExampleIterator.tcc
-         tcc/SparseBinaryDataVector.tcc
-         tcc/SparseDataVector.tcc
-         tcc/StlIndexValueIterator.tcc
-         tcc/TextLine.tcc
-         tcc/TransformedDataVector.tcc
-         tcc/TransformingIndexValueIterator.tcc)
-
-set (doc doc/GeneralizedSparseFormat.md
+             set (doc doc/GeneralizedSparseFormat.md
          doc/README.md)
 
 source_group("src" FILES ${src})
 source_group("include" FILES ${include})
-source_group("tcc" FILES ${tcc})
 source_group("doc" FILES ${doc})
 
-add_library(${library_name} ${src} ${include} ${tcc} ${doc})
+add_library(${library_name} ${src} ${include} ${doc})
 target_include_directories(${library_name} PRIVATE include ${ELL_LIBRARIES_DIR})
 target_link_libraries(${library_name} math utilities)
 
diff --git a/libraries/data/include/AutoDataVector.h b/libraries/data/include/AutoDataVector.h
index 0515c4643..2f190872e 100644
--- a/libraries/data/include/AutoDataVector.h
+++ b/libraries/data/include/AutoDataVector.h
@@ -208,4 +208,201 @@ namespace data
 } // namespace data
 } // namespace ell
 
-#include "../tcc/AutoDataVector.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace data
+{
+    template <typename DefaultDataVectorType>
+    AutoDataVectorBase<DefaultDataVectorType>::AutoDataVectorBase(DefaultDataVectorType&& vector)
+    {
+        FindBestRepresentation(std::move(vector));
+    }
+
+    template <typename DefaultDataVectorType>
+    template <typename IndexValueIteratorType, IsIndexValueIterator<IndexValueIteratorType> Concept>
+    AutoDataVectorBase<DefaultDataVectorType>::AutoDataVectorBase(IndexValueIteratorType indexValueIterator)
+    {
+        DefaultDataVectorType defaultDataVector(std::move(indexValueIterator));
+        FindBestRepresentation(std::move(defaultDataVector));
+    }
+
+    template <typename DefaultDataVectorType>
+    AutoDataVectorBase<DefaultDataVectorType>::AutoDataVectorBase(std::initializer_list<IndexValue> list)
+    {
+        DefaultDataVectorType defaultDataVector(std::move(list));
+        FindBestRepresentation(std::move(defaultDataVector));
+    }
+
+    template <typename DefaultDataVectorType>
+    AutoDataVectorBase<DefaultDataVectorType>::AutoDataVectorBase(std::initializer_list<double> list)
+    {
+        DefaultDataVectorType defaultDataVector(std::move(list));
+        FindBestRepresentation(std::move(defaultDataVector));
+    }
+
+    template <typename DefaultDataVectorType>
+    AutoDataVectorBase<DefaultDataVectorType>::AutoDataVectorBase(std::vector<IndexValue> vec)
+    {
+        DefaultDataVectorType defaultDataVector(std::move(vec));
+        FindBestRepresentation(std::move(defaultDataVector));
+    }
+
+    template <typename DefaultDataVectorType>
+    AutoDataVectorBase<DefaultDataVectorType>::AutoDataVectorBase(std::vector<double> vec)
+    {
+        DefaultDataVectorType defaultDataVector(std::move(vec));
+        FindBestRepresentation(std::move(defaultDataVector));
+    }
+
+    template <typename DefaultDataVectorType>
+    void AutoDataVectorBase<DefaultDataVectorType>::AppendElement(size_t /*index*/, double /*value*/)
+    {
+        throw utilities::LogicException(utilities::LogicExceptionErrors::notImplemented, "Append element not supported for AutoDataVector");
+    }
+
+    template <typename DefaultDataVectorType>
+    double AutoDataVectorBase<DefaultDataVectorType>::Dot(math::UnorientedConstVectorBase<double> vector) const
+    {
+        return _pInternal->Dot(vector);
+    }
+
+    template <typename DefaultDataVectorType>
+    float AutoDataVectorBase<DefaultDataVectorType>::Dot(math::UnorientedConstVectorBase<float> vector) const
+    {
+        return _pInternal->Dot(vector);
+    }
+
+    template <typename DefaultDataVectorType>
+    void AutoDataVectorBase<DefaultDataVectorType>::AddTo(math::RowVectorReference<double> vector) const
+    {
+        _pInternal->AddTo(vector);
+    }
+
+    template <typename DefaultDataVectorType>
+    std::vector<double> AutoDataVectorBase<DefaultDataVectorType>::ToArray(size_t size) const
+    {
+        return _pInternal->ToArray(size);
+    }
+
+    template <typename DefaultDataVectorType>
+    void AutoDataVectorBase<DefaultDataVectorType>::Print(std::ostream& os) const
+    {
+        _pInternal->Print(os);
+    }
+
+    template <typename DefaultDataVectorType>
+    template <IterationPolicy policy, typename TransformationType>
+    void AutoDataVectorBase<DefaultDataVectorType>::AddTransformedTo(math::RowVectorReference<double> vector, TransformationType transformation) const
+    {
+        _pInternal->AddTransformedTo<policy>(vector, transformation);
+    }
+
+    template <typename DefaultDataVectorType>
+    template <typename ReturnType, typename... ArgTypes>
+    ReturnType AutoDataVectorBase<DefaultDataVectorType>::CopyAs(ArgTypes... args) const
+    {
+        return _pInternal->CopyAs<ReturnType>(args...);
+    }
+
+    template <typename DefaultDataVectorType>
+    template <IterationPolicy policy, typename ReturnType, typename... ArgTypes>
+    ReturnType AutoDataVectorBase<DefaultDataVectorType>::TransformAs(ArgTypes... args) const
+    {
+        return _pInternal->TransformAs<policy, ReturnType>(args...);
+    }
+
+    template <typename TargetType>
+    bool DoesCastModifyValue(double value)
+    {
+        double target = static_cast<double>(static_cast<TargetType>(value));
+        return (target - value > APPROXIMATION_TOLERANCE) || (value - target > APPROXIMATION_TOLERANCE);
+    }
+
+    template <typename DefaultDataVectorType>
+    void AutoDataVectorBase<DefaultDataVectorType>::FindBestRepresentation(DefaultDataVectorType defaultDataVector)
+    {
+        size_t numNonZeros = 0;
+        bool includesNonFloats = false;
+        bool includesNonShorts = false;
+        bool includesNonBytes = false;
+        bool includesNonBinary = false;
+
+        auto iter = GetIterator<DefaultDataVectorType, IterationPolicy::skipZeros>(defaultDataVector);
+        while (iter.IsValid())
+        {
+            double value = iter.Get().value;
+
+            ++numNonZeros;
+            includesNonFloats |= DoesCastModifyValue<float>(value);
+            includesNonShorts |= DoesCastModifyValue<short>(value);
+            includesNonBytes |= DoesCastModifyValue<char>(value);
+            includesNonBinary |= (value != 1 && value != 0);
+
+            iter.Next();
+        }
+
+        // dense
+        if (numNonZeros > SPARSE_THRESHOLD * defaultDataVector.PrefixLength())
+        {
+            if (includesNonFloats)
+            {
+                SetInternal<DoubleDataVector>(std::move(defaultDataVector));
+            }
+            else if (includesNonShorts)
+            {
+                SetInternal<FloatDataVector>(std::move(defaultDataVector));
+            }
+            else if (includesNonBytes)
+            {
+                SetInternal<ShortDataVector>(std::move(defaultDataVector));
+            }
+            else
+            {
+                SetInternal<ByteDataVector>(std::move(defaultDataVector));
+            }
+        }
+
+        // sparse
+        else
+        {
+            if (includesNonFloats)
+            {
+                SetInternal<SparseDoubleDataVector>(std::move(defaultDataVector));
+            }
+            else if (includesNonShorts)
+            {
+                SetInternal<SparseFloatDataVector>(std::move(defaultDataVector));
+            }
+            else if (includesNonBytes)
+            {
+                SetInternal<SparseShortDataVector>(std::move(defaultDataVector));
+            }
+            else if (includesNonBinary)
+            {
+                SetInternal<SparseByteDataVector>(std::move(defaultDataVector));
+            }
+            else
+            {
+                SetInternal<SparseBinaryDataVector>(std::move(defaultDataVector));
+            }
+        }
+    }
+
+    template <typename DefaultDataVectorType>
+    template <typename DataVectorType, utilities::IsDifferent<DataVectorType, DefaultDataVectorType> Concept>
+    void AutoDataVectorBase<DefaultDataVectorType>::SetInternal(DefaultDataVectorType defaultDataVector)
+    {
+        _pInternal = std::make_unique<DataVectorType>(GetIterator<DefaultDataVectorType, IterationPolicy::skipZeros>(defaultDataVector));
+    }
+
+    template <typename IndexValueParsingIterator>
+    AutoDataVector AutoDataVectorParser<IndexValueParsingIterator>::Parse(TextLine& textLine)
+    {
+        return AutoDataVector(IndexValueParsingIterator(textLine));
+    }
+} // namespace data
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/data/include/DataVector.h b/libraries/data/include/DataVector.h
index e1039de9b..1ad290e32 100644
--- a/libraries/data/include/DataVector.h
+++ b/libraries/data/include/DataVector.h
@@ -433,4 +433,366 @@ namespace data
 } // namespace data
 } // namespace ell
 
-#include "../tcc/DataVector.tcc"
\ No newline at end of file
+#pragma region implementation
+
+#include "../include/DenseDataVector.h"
+#include "../include/SparseBinaryDataVector.h"
+#include "../include/SparseDataVector.h"
+#include "../include/TransformingIndexValueIterator.h"
+
+namespace ell
+{
+namespace data
+{
+    template <typename ReturnType, typename GenericLambdaType>
+    ReturnType IDataVector::InvokeWithThis(GenericLambdaType lambda) const
+    {
+        auto type = GetType();
+        switch (type)
+        {
+        case Type::DoubleDataVector:
+            return lambda(static_cast<const DoubleDataVector*>(this));
+
+        case Type::FloatDataVector:
+            return lambda(static_cast<const FloatDataVector*>(this));
+
+        case Type::ShortDataVector:
+            return lambda(static_cast<const ShortDataVector*>(this));
+
+        case Type::ByteDataVector:
+            return lambda(static_cast<const ByteDataVector*>(this));
+
+        case Type::SparseDoubleDataVector:
+            return lambda(static_cast<const SparseDoubleDataVector*>(this));
+
+        case Type::SparseFloatDataVector:
+            return lambda(static_cast<const SparseFloatDataVector*>(this));
+
+        case Type::SparseShortDataVector:
+            return lambda(static_cast<const SparseShortDataVector*>(this));
+
+        case Type::SparseByteDataVector:
+            return lambda(static_cast<const SparseByteDataVector*>(this));
+
+        case Type::SparseBinaryDataVector:
+            return lambda(static_cast<const SparseBinaryDataVector*>(this));
+
+        default:
+            throw utilities::LogicException(utilities::LogicExceptionErrors::illegalState, "attempted to cast unsupported data vector type");
+        }
+    }
+
+    template <IterationPolicy policy, typename TransformationType>
+    void IDataVector::AddTransformedTo(math::RowVectorReference<double> vector, TransformationType transformation) const
+    {
+        InvokeWithThis<void>([vector, transformation](const auto* pThis) {
+            pThis->template AddTransformedTo<policy>(vector, transformation);
+        });
+    }
+
+    template <typename ReturnType>
+    ReturnType IDataVector::CopyAs() const
+    {
+        return InvokeWithThis<ReturnType>([](const auto* pThis) {
+            return ReturnType(pThis->template GetIterator<IterationPolicy::skipZeros>());
+        });
+    }
+
+    template <IterationPolicy policy, typename ReturnType, typename TransformationType>
+    ReturnType IDataVector::TransformAs(TransformationType transformation, size_t size) const
+    {
+        return InvokeWithThis<ReturnType>([transformation, size](const auto* pThis) {
+            return ReturnType(MakeTransformingIndexValueIterator(pThis->template GetIterator<policy>(size), transformation));
+        });
+    }
+
+    template <IterationPolicy policy, typename ReturnType, typename TransformationType>
+    ReturnType IDataVector::TransformAs(TransformationType transformation) const
+    {
+        return InvokeWithThis<ReturnType>([transformation](const auto* pThis) {
+            return ReturnType(MakeTransformingIndexValueIterator(pThis->template GetIterator<policy>(), transformation));
+        });
+    }
+
+    template <class DerivedType>
+    template <typename IndexValueIteratorType, IsIndexValueIterator<IndexValueIteratorType> Concept>
+    void DataVectorBase<DerivedType>::AppendElements(IndexValueIteratorType indexValueIterator)
+    {
+        while (indexValueIterator.IsValid())
+        {
+            auto current = indexValueIterator.Get();
+            static_cast<DerivedType*>(this)->AppendElement(current.index, current.value);
+            indexValueIterator.Next();
+        }
+    }
+
+    template <class DerivedType>
+    void DataVectorBase<DerivedType>::AppendElements(std::initializer_list<IndexValue> list)
+    {
+        for (const auto& current : list)
+        {
+            static_cast<DerivedType*>(this)->AppendElement(current.index, current.value);
+        }
+    }
+
+    template <class DerivedType>
+    void DataVectorBase<DerivedType>::AppendElements(std::initializer_list<double> list)
+    {
+        size_t index = 0;
+        for (double current : list)
+        {
+            static_cast<DerivedType*>(this)->AppendElement(index++, current);
+        }
+    }
+
+    template <class DerivedType>
+    void DataVectorBase<DerivedType>::AppendElements(std::vector<IndexValue> vec)
+    {
+        for (const auto& current : vec)
+        {
+            static_cast<DerivedType*>(this)->AppendElement(current.index, current.value);
+        }
+    }
+
+    template <class DerivedType>
+    void DataVectorBase<DerivedType>::AppendElements(const std::vector<double>& vec)
+    {
+        size_t index = 0;
+        for (double current : vec)
+        {
+            static_cast<DerivedType*>(this)->AppendElement(index++, current);
+        }
+    }
+
+    template <class DerivedType>
+    void DataVectorBase<DerivedType>::AppendElements(const std::vector<float>& vec)
+    {
+        size_t index = 0;
+        for (float current : vec)
+        {
+            static_cast<DerivedType*>(this)->AppendElement(index++, current);
+        }
+    }
+
+    template <class DerivedType>
+    double DataVectorBase<DerivedType>::Norm2Squared() const
+    {
+        auto iter = GetIterator<DerivedType, IterationPolicy::skipZeros>(*static_cast<const DerivedType*>(this));
+
+        double result = 0.0;
+        while (iter.IsValid())
+        {
+            double value = iter.Get().value;
+            result += value * value;
+            iter.Next();
+        }
+        return result;
+    }
+
+    template <class DerivedType>
+    double DataVectorBase<DerivedType>::Dot(math::UnorientedConstVectorBase<double> vector) const
+    {
+        auto indexValueIterator = GetIterator<DerivedType, IterationPolicy::skipZeros>(*static_cast<const DerivedType*>(this));
+
+        double result = 0.0;
+        auto size = vector.Size();
+        while (indexValueIterator.IsValid())
+        {
+            auto indexValue = indexValueIterator.Get();
+            if (indexValue.index >= size)
+            {
+                break;
+            }
+            result += indexValue.value * vector[indexValue.index];
+            indexValueIterator.Next();
+        }
+        return result;
+    }
+
+    template <class DerivedType>
+    float DataVectorBase<DerivedType>::Dot(math::UnorientedConstVectorBase<float> vector) const
+    {
+        auto indexValueIterator = GetIterator<DerivedType, IterationPolicy::skipZeros>(*static_cast<const DerivedType*>(this));
+
+        float result = 0.0;
+        auto size = vector.Size();
+        while (indexValueIterator.IsValid())
+        {
+            auto indexValue = indexValueIterator.Get();
+            if (indexValue.index >= size)
+            {
+                break;
+            }
+            result += static_cast<float>(indexValue.value) * vector[indexValue.index];
+            indexValueIterator.Next();
+        }
+        return result;
+    }
+
+    template <class DerivedType>
+    void DataVectorBase<DerivedType>::AddTo(math::RowVectorReference<double> vector) const
+    {
+        auto indexValueIterator = GetIterator<DerivedType, IterationPolicy::skipZeros>(*static_cast<const DerivedType*>(this));
+
+        auto size = vector.Size();
+        while (indexValueIterator.IsValid())
+        {
+            auto indexValue = indexValueIterator.Get();
+            if (indexValue.index >= size)
+            {
+                return;
+            }
+            vector[indexValue.index] += indexValue.value;
+            indexValueIterator.Next();
+        }
+    }
+
+    template <class DerivedType>
+    std::vector<double> DataVectorBase<DerivedType>::ToArray(size_t size) const
+    {
+        std::vector<double> result(size);
+        auto indexValueIterator = GetIterator<DerivedType, IterationPolicy::skipZeros>(*static_cast<const DerivedType*>(this));
+
+        while (indexValueIterator.IsValid())
+        {
+            auto indexValue = indexValueIterator.Get();
+            if (indexValue.index >= size)
+            {
+                break;
+            }
+            result[indexValue.index] = indexValue.value;
+            indexValueIterator.Next();
+        }
+
+        return result;
+    }
+
+    template <class DerivedType>
+    template <IterationPolicy policy, typename TransformationType>
+    void DataVectorBase<DerivedType>::AddTransformedTo(math::RowVectorReference<double> vector, TransformationType transformation) const
+    {
+        auto size = vector.Size();
+        auto indexValueIterator = GetIterator<DerivedType, policy>(*static_cast<const DerivedType*>(this), size);
+
+        while (indexValueIterator.IsValid())
+        {
+            auto indexValue = indexValueIterator.Get();
+            if (indexValue.index >= size)
+            {
+                return;
+            }
+            double result = transformation(indexValue);
+            vector[indexValue.index] += result;
+            indexValueIterator.Next();
+        }
+    }
+
+    template <class DerivedType>
+    template <typename ReturnType>
+    ReturnType DataVectorBase<DerivedType>::CopyAs() const
+    {
+        return ReturnType(GetIterator<DerivedType, IterationPolicy::skipZeros>(*static_cast<const DerivedType*>(this)));
+    }
+
+    template <class DerivedType>
+    template <IterationPolicy policy, typename ReturnType, typename TransformationType>
+    ReturnType DataVectorBase<DerivedType>::TransformAs(TransformationType transformation, size_t size) const
+    {
+        return ReturnType(MakeTransformingIndexValueIterator(GetIterator<DerivedType, policy>(*static_cast<const DerivedType*>(this), size), std::move(transformation)));
+    }
+
+    template <class DerivedType>
+    template <IterationPolicy policy, typename ReturnType, typename TransformationType>
+    ReturnType DataVectorBase<DerivedType>::TransformAs(TransformationType transformation) const
+    {
+        return ReturnType(MakeTransformingIndexValueIterator(GetIterator<DerivedType, policy>(*static_cast<const DerivedType*>(this)), std::move(transformation)));
+    }
+
+    template <class DerivedType>
+    void DataVectorBase<DerivedType>::Print(std::ostream& os) const
+    {
+        auto indexValueIterator = GetIterator<DerivedType, IterationPolicy::skipZeros>(*static_cast<const DerivedType*>(this));
+        if (indexValueIterator.IsValid())
+        {
+            auto indexValue = indexValueIterator.Get();
+            os << indexValue.index << ":" << indexValue.value;
+            indexValueIterator.Next();
+        }
+
+        while (indexValueIterator.IsValid())
+        {
+            auto indexValue = indexValueIterator.Get();
+            os << '\t' << indexValue.index << ":" << indexValue.value;
+            indexValueIterator.Next();
+        }
+    }
+
+    template <typename DataVectorType, IterationPolicy policy, typename TransformationType>
+    static void AddTransformedTo(const DataVectorType& dataVector, math::RowVectorReference<double> vector, TransformationType transformation)
+    {
+        return dataVector.template AddTransformedTo<policy, TransformationType>(vector, transformation);
+    }
+
+    template <typename DataVectorType, IterationPolicy policy>
+    static auto GetIterator(DataVectorType& vector)
+    {
+        return vector.template GetIterator<policy>();
+    }
+
+    template <typename DataVectorType, IterationPolicy policy>
+    static auto GetIterator(const DataVectorType& vector)
+    {
+        return vector.template GetIterator<policy>();
+    }
+
+    template <typename DataVectorType, IterationPolicy policy>
+    static auto GetIterator(DataVectorType& vector, size_t size)
+    {
+        return vector.template GetIterator<policy>(size);
+    }
+
+    template <typename DataVectorType, IterationPolicy policy>
+    static auto GetIterator(const DataVectorType& vector, size_t size)
+    {
+        return vector.template GetIterator<policy>(size);
+    }
+
+    template <typename DataVectorType, typename ReturnType>
+    static ReturnType CopyAs(DataVectorType& vector)
+    {
+        return vector.template CopyAs<ReturnType>();
+    }
+
+    template <typename DataVectorType, typename ReturnType>
+    static ReturnType CopyAs(const DataVectorType& vector)
+    {
+        return vector.template CopyAs<ReturnType>();
+    }
+
+    template <typename DataVectorType, IterationPolicy policy, typename ReturnType, typename TransformationType>
+    static ReturnType TransformAs(DataVectorType& vector, TransformationType transformation, size_t size)
+    {
+        return vector.template TransformAs<policy, ReturnType, TransformationType>(transformation, size);
+    }
+
+    template <typename DataVectorType, IterationPolicy policy, typename ReturnType, typename TransformationType>
+    static ReturnType TransformAs(const DataVectorType& vector, TransformationType transformation, size_t size)
+    {
+        return vector.template TransformAs<policy, ReturnType, TransformationType>(transformation, size);
+    }
+
+    template <typename DataVectorType, IterationPolicy policy, typename ReturnType, typename TransformationType>
+    static ReturnType TransformAs(DataVectorType& vector, TransformationType transformation)
+    {
+        return vector.template TransformAs<policy, ReturnType, TransformationType>(transformation);
+    }
+
+    template <typename DataVectorType, IterationPolicy policy, typename ReturnType, typename TransformationType>
+    static ReturnType TransformAs(const DataVectorType& vector, TransformationType transformation)
+    {
+        return vector.template TransformAs<policy, ReturnType, TransformationType>(transformation);
+    }
+} // namespace data
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/data/include/DataVectorOperations.h b/libraries/data/include/DataVectorOperations.h
index 243c563ad..74bccf07e 100644
--- a/libraries/data/include/DataVectorOperations.h
+++ b/libraries/data/include/DataVectorOperations.h
@@ -89,4 +89,54 @@ namespace data
 } // namespace data
 } // namespace ell
 
-#include "../tcc/DataVectorOperations.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace data
+{
+    template <typename DataVectorType, IsDataVector<DataVectorType> Concept>
+    auto operator*(double scalar, const DataVectorType& vector)
+    {
+        return MakeTransformedDataVector<IterationPolicy::skipZeros>(vector, [scalar](IndexValue x) { return scalar * x.value; });
+    }
+
+    template <typename DataVectorType, IsDataVector<DataVectorType> Concept>
+    auto operator*(const DataVectorType& vector, double scalar)
+    {
+        return scalar * vector;
+    }
+
+    template <typename ElementType>
+    ElementType operator*(math::UnorientedConstVectorBase<ElementType> vector, const IDataVector& dataVector)
+    {
+        return dataVector.Dot(vector);
+    }
+
+    template <typename DataVectorType>
+    auto Square(const DataVectorType& vector)
+    {
+        return MakeTransformedDataVector<IterationPolicy::skipZeros>(vector, [](IndexValue x) { return x.value * x.value; });
+    }
+
+    template <typename DataVectorType>
+    auto Sqrt(const DataVectorType& vector)
+    {
+        return MakeTransformedDataVector<IterationPolicy::skipZeros>(vector, [](IndexValue x) { return std::sqrt(x.value); });
+    }
+
+    template <typename DataVectorType>
+    auto Abs(const DataVectorType& vector)
+    {
+        return MakeTransformedDataVector<IterationPolicy::skipZeros>(vector, [](IndexValue x) { return std::abs(x.value); });
+    }
+
+    template <typename DataVectorType>
+    auto ZeroIndicator(const DataVectorType& vector)
+    {
+        return MakeTransformedDataVector<IterationPolicy::all>(vector, [](IndexValue x) { return x.value == 0.0 ? 1.0 : 0.0; });
+    }
+} // namespace data
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/data/include/Dataset.h b/libraries/data/include/Dataset.h
index 4b2c44239..ac7a1655b 100644
--- a/libraries/data/include/Dataset.h
+++ b/libraries/data/include/Dataset.h
@@ -290,4 +290,236 @@ namespace data
 } // namespace data
 } // namespace ell
 
-#include "../tcc/Dataset.tcc"
+#pragma region implementation
+
+#include <utilities/include/Exception.h>
+#include <utilities/include/Logger.h>
+
+#include <algorithm>
+#include <random>
+#include <stdexcept>
+
+namespace ell
+{
+namespace data
+{
+    using namespace logging;
+
+    template <typename ExampleType>
+    ExampleIterator<ExampleType> AnyDataset::GetExampleIterator() const
+    {
+        auto fromIndex = _fromIndex;
+        auto size = _size;
+        auto getExampleIterator = [fromIndex, size](const auto* pDataset) { return pDataset->template GetExampleIterator<ExampleType>(fromIndex, size); };
+
+        // all Dataset types for which GetAnyDataset() is called must be listed below, in the variadic template argument.
+        using Invoker = utilities::AbstractInvoker<DatasetBase,
+                                                   Dataset<data::AutoSupervisedExample>,
+                                                   Dataset<data::DenseSupervisedExample>>;
+
+        return Invoker::Invoke<ExampleIterator<ExampleType>>(getExampleIterator, _pDataset);
+    }
+
+    template <typename DatasetExampleType>
+    template <typename IteratorExampleType>
+    Dataset<DatasetExampleType>::DatasetExampleIterator<IteratorExampleType>::DatasetExampleIterator(InternalIteratorType begin, InternalIteratorType end) :
+        _current(begin),
+        _end(end)
+    {
+    }
+
+    template <typename DatasetExampleType>
+    Dataset<DatasetExampleType>::Dataset(ExampleIterator<DatasetExampleType> exampleIterator)
+    {
+        while (exampleIterator.IsValid())
+        {
+            AddExample(exampleIterator.Get());
+            exampleIterator.Next();
+        }
+    }
+
+    template <typename DatasetExampleType>
+    Dataset<DatasetExampleType>::Dataset(const AnyDataset& anyDataset) :
+        Dataset(anyDataset.GetExampleIterator<DatasetExampleType>())
+    {
+    }
+
+    template <typename DatasetExampleType>
+    void Dataset<DatasetExampleType>::Swap(Dataset& other)
+    {
+        std::swap(_examples, other._examples);
+        std::swap(_numFeatures, other._numFeatures);
+    }
+
+    template <typename DatasetExampleType>
+    DatasetExampleType& Dataset<DatasetExampleType>::GetExample(size_t index)
+    {
+        return _examples[index];
+    }
+
+    template <typename DatasetExampleType>
+    const DatasetExampleType& Dataset<DatasetExampleType>::GetExample(size_t index) const
+    {
+        return _examples[index];
+    }
+
+    template <typename DatasetExampleType>
+    DatasetExampleType& Dataset<DatasetExampleType>::operator[](size_t index)
+    {
+        return _examples[index];
+    }
+
+    template <typename DatasetExampleType>
+    const DatasetExampleType& Dataset<DatasetExampleType>::operator[](size_t index) const
+    {
+        return _examples[index];
+    }
+
+    template <typename DatasetExampleType>
+    template <typename IteratorExampleType>
+    ExampleIterator<IteratorExampleType> Dataset<DatasetExampleType>::GetExampleIterator(size_t fromIndex, size_t size) const
+    {
+        size = CorrectRangeSize(fromIndex, size);
+        return ExampleIterator<IteratorExampleType>(std::make_unique<DatasetExampleIterator<IteratorExampleType>>(_examples.cbegin() + fromIndex, _examples.cbegin() + fromIndex + size));
+    }
+
+    template <typename DatasetExampleType>
+    auto Dataset<DatasetExampleType>::GetExampleReferenceIterator(size_t fromIndex, size_t size) const -> ExampleReferenceIterator<DatasetExampleType>
+    {
+        size = CorrectRangeSize(fromIndex, size);
+        return ExampleReferenceIterator<DatasetExampleType>(_examples.cbegin() + fromIndex, _examples.cbegin() + fromIndex + size);
+    }
+
+    template <typename DatasetExampleType>
+    void Dataset<DatasetExampleType>::AddExample(DatasetExampleType example)
+    {
+        size_t numFeatures = example.GetDataVector().PrefixLength();
+        _examples.push_back(std::move(example));
+
+        if (_numFeatures < numFeatures)
+        {
+            _numFeatures = numFeatures;
+        }
+    }
+
+    template <typename DatasetExampleType>
+    template <typename otherExampleType>
+    Dataset<otherExampleType> Dataset<DatasetExampleType>::Transform(std::function<otherExampleType(const DatasetExampleType&)> transformationFunction)
+    {
+        Dataset<otherExampleType> dataset;
+        for (auto& example : _examples)
+        {
+            dataset.AddExample(transformationFunction(example));
+        }
+        return dataset;
+    }
+
+    template <typename DatasetExampleType>
+    void Dataset<DatasetExampleType>::Reset()
+    {
+        _examples.clear();
+        _numFeatures = 0;
+    }
+
+    template <typename DatasetExampleType>
+    void Dataset<DatasetExampleType>::RandomPermute(std::default_random_engine& rng, size_t prefixSize)
+    {
+        prefixSize = CorrectRangeSize(0, prefixSize);
+        for (size_t i = 0; i < prefixSize; ++i)
+        {
+            RandomSwap(rng, i, i, _examples.size() - i);
+        }
+    }
+
+    template <typename DatasetExampleType>
+    void Dataset<DatasetExampleType>::RandomPermute(std::default_random_engine& rng, size_t rangeFirstIndex, size_t rangeSize, size_t prefixSize)
+    {
+        rangeSize = CorrectRangeSize(rangeFirstIndex, rangeSize);
+
+        if (prefixSize > rangeSize || prefixSize == 0)
+        {
+            prefixSize = rangeSize;
+        }
+
+        for (size_t s = 0; s < prefixSize; ++s)
+        {
+            size_t index = rangeFirstIndex + s;
+            RandomSwap(rng, index, index, rangeSize - s);
+        }
+    }
+
+    template <typename DatasetExampleType>
+    void Dataset<DatasetExampleType>::RandomSwap(std::default_random_engine& rng, size_t targetExampleIndex, size_t rangeFirstIndex, size_t rangeSize)
+    {
+        using std::swap;
+        rangeSize = CorrectRangeSize(rangeFirstIndex, rangeSize);
+        if (targetExampleIndex > _examples.size())
+        {
+            throw utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange);
+        }
+
+        std::uniform_int_distribution<size_t> dist(rangeFirstIndex, rangeFirstIndex + rangeSize - 1);
+        size_t j = dist(rng);
+        swap(_examples[targetExampleIndex], _examples[j]);
+    }
+
+    template <typename DatasetExampleType>
+    template <typename SortKeyType>
+    void Dataset<DatasetExampleType>::Sort(SortKeyType sortKey, size_t fromIndex, size_t size)
+    {
+        size = CorrectRangeSize(fromIndex, size);
+
+        std::sort(_examples.begin() + fromIndex,
+                  _examples.begin() + fromIndex + size,
+                  [&](const DatasetExampleType& a, const DatasetExampleType& b) -> bool {
+                      return sortKey(a) < sortKey(b);
+                  });
+    }
+
+    template <typename DatasetExampleType>
+    template <typename PartitionKeyType>
+    void Dataset<DatasetExampleType>::Partition(PartitionKeyType partitionKey, size_t fromIndex, size_t size)
+    {
+        size = CorrectRangeSize(fromIndex, size);
+        std::partition(_examples.begin() + fromIndex, _examples.begin() + fromIndex + size, partitionKey);
+    }
+
+    template <typename DatasetExampleType>
+    void Dataset<DatasetExampleType>::Print(std::ostream& os, size_t tabs, size_t fromIndex, size_t size) const
+    {
+        size = CorrectRangeSize(fromIndex, size);
+
+        for (size_t index = fromIndex; index < fromIndex + size; ++index)
+        {
+            os << std::string(tabs * 4, ' ');
+            _examples[index].Print(os);
+            os << EOL;
+        }
+    }
+
+    template <typename DatasetExampleType>
+    std::ostream& operator<<(std::ostream& os, const Dataset<DatasetExampleType>& dataset)
+    {
+        dataset.Print(os);
+        return os;
+    }
+
+    template <typename DatasetExampleType>
+    size_t Dataset<DatasetExampleType>::CorrectRangeSize(size_t fromIndex, size_t size) const
+    {
+        if (size == 0 || fromIndex + size > _examples.size())
+        {
+            return _examples.size() - fromIndex;
+        }
+        return size;
+    }
+
+    template <typename ExampleType>
+    Dataset<ExampleType> MakeDataset(ExampleIterator<ExampleType> exampleIterator)
+    {
+        return Dataset<ExampleType>(std::move(exampleIterator));
+    }
+} // namespace data
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/data/include/DenseDataVector.h b/libraries/data/include/DenseDataVector.h
index 6ab8cad9c..4bc179235 100644
--- a/libraries/data/include/DenseDataVector.h
+++ b/libraries/data/include/DenseDataVector.h
@@ -146,6 +146,114 @@ namespace data
 } // namespace data
 } // namespace ell
 
-#include "../tcc/DenseDataVector.tcc"
+#pragma region implementation
+
+#include <utilities/include/Exception.h>
+#include <utilities/include/StringUtil.h>
+#include <utilities/include/TypeName.h>
+
+#include <cassert>
+
+namespace ell
+{
+namespace data
+{
+    template <typename ElementType>
+    DenseDataVector<ElementType>::DenseDataVector() :
+        _numNonzeros(0)
+    {
+        _data.reserve(DEFAULT_DENSE_VECTOR_CAPACITY);
+    }
+
+    template <typename ElementType>
+    template <typename IndexValueIteratorType, IsIndexValueIterator<IndexValueIteratorType> Concept>
+    DenseDataVector<ElementType>::DenseDataVector(IndexValueIteratorType indexValueIterator)
+    {
+        AppendElements(std::move(indexValueIterator));
+    }
+
+    template <typename ElementType>
+    DenseDataVector<ElementType>::DenseDataVector(std::initializer_list<IndexValue> list)
+    {
+        AppendElements(std::move(list));
+    }
+
+    template <typename ElementType>
+    DenseDataVector<ElementType>::DenseDataVector(std::initializer_list<double> list)
+    {
+        AppendElements(std::move(list));
+    }
+
+    template <typename ElementType>
+    DenseDataVector<ElementType>::DenseDataVector(std::vector<IndexValue> list)
+    {
+        AppendElements(std::move(list));
+    }
+
+    template <typename ElementType>
+    DenseDataVector<ElementType>::DenseDataVector(std::vector<double> list)
+    {
+        AppendElements(std::move(list));
+    }
+
+    template <typename ElementType>
+    DenseDataVector<ElementType>::DenseDataVector(std::vector<float> list)
+    {
+        AppendElements(std::move(list));
+    }
+
+    template <typename ElementType>
+    double DenseDataVector<ElementType>::operator[](size_t index) const
+    {
+        if (index >= _data.size())
+        {
+            return 0.0;
+        }
+        return static_cast<double>(_data[index]);
+    }
+
+    template <typename ElementType>
+    template <IterationPolicy policy>
+    VectorIndexValueIterator<policy, ElementType> DenseDataVector<ElementType>::GetIterator(size_t size) const
+    {
+        return MakeVectorIndexValueIterator<policy>(_data, size);
+    }
+
+    template <typename ElementType> // move this to datavectorbase?
+    template <IterationPolicy policy>
+    VectorIndexValueIterator<policy, ElementType> DenseDataVector<ElementType>::GetIterator() const
+    {
+        return GetIterator<policy>(PrefixLength());
+    }
+
+    template <typename ElementType>
+    void DenseDataVector<ElementType>::AppendElement(size_t index, double value)
+    {
+        if (value == 0)
+        {
+            return;
+        }
+
+        ElementType storedValue = static_cast<ElementType>(value);
+
+        if (storedValue - value > 1.0e-5 || value - storedValue > 1.0e-5)
+        {
+            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument,
+                                            utilities::FormatString("Data loss detected when storing value %f as type %s", value, utilities::GetTypeName<ElementType>().c_str()));
+        }
+
+        if (index < _data.size())
+        {
+            throw utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "Can only append values to the end of a data vector");
+        }
+
+        _data.resize(index + 1);
+        _data[index] = storedValue;
+        ++_numNonzeros;
+    }
+} // namespace data
+} // namespace ell
+
+#pragma endregion implementation
 
 #endif // DENSEDATAVECTOR_H
diff --git a/libraries/data/include/Example.h b/libraries/data/include/Example.h
index 59a2f87b4..89fcd3b3f 100644
--- a/libraries/data/include/Example.h
+++ b/libraries/data/include/Example.h
@@ -152,4 +152,59 @@ namespace data
 } // namespace data
 } // namespace ell
 
-#include "../tcc/Example.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace data
+{
+    template <typename DataVectorType, typename MetadataType>
+    Example<DataVectorType, MetadataType>::Example(DataVectorType dataVector, MetadataType metadata) :
+        _dataVector(std::make_shared<const DataVectorType>(std::move(dataVector))),
+        _metadata(std::move(metadata))
+    {
+    }
+
+    template <typename DataVectorType, typename MetadataType>
+    Example<DataVectorType, MetadataType>::Example(const std::shared_ptr<const DataVectorType>& dataVector, const MetadataType& metadata) :
+        _dataVector(dataVector),
+        _metadata(metadata)
+    {
+    }
+
+    template <typename DataVectorType, typename MetadataType>
+    template <typename TargetExampleType, utilities::IsSame<typename TargetExampleType::DataVectorType, DataVectorType> Concept>
+    TargetExampleType Example<DataVectorType, MetadataType>::CopyAs() const
+    {
+        // shallow copy of data vector
+        return TargetExampleType(_dataVector, typename TargetExampleType::MetadataType(_metadata));
+    }
+
+    template <typename DataVectorType, typename MetadataType>
+    template <typename TargetExampleType, utilities::IsDifferent<typename TargetExampleType::DataVectorType, DataVectorType>>
+    TargetExampleType Example<DataVectorType, MetadataType>::CopyAs() const
+    {
+        // deep copy of data vector
+        using DataType = typename TargetExampleType::DataVectorType;
+        using TargetMetadataType = typename TargetExampleType::MetadataType;
+        return TargetExampleType(std::make_shared<DataType>(_dataVector->template CopyAs<DataType>()), TargetMetadataType(_metadata));
+    }
+
+    template <typename DataVectorType, typename MetadataType>
+    void Example<DataVectorType, MetadataType>::Print(std::ostream& os) const
+    {
+        os << _metadata;
+        os << "\t";
+        _dataVector->Print(os);
+    }
+
+    template <typename DataVectorType, typename MetadataType>
+    std::ostream& operator<<(std::ostream& ostream, const Example<DataVectorType, MetadataType>& example)
+    {
+        example.Print(ostream);
+        return ostream;
+    }
+} // namespace data
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/data/include/ExampleIterator.h b/libraries/data/include/ExampleIterator.h
index d6dca4ef4..7df721ed9 100644
--- a/libraries/data/include/ExampleIterator.h
+++ b/libraries/data/include/ExampleIterator.h
@@ -64,4 +64,18 @@ namespace data
 } // namespace data
 } // namespace ell
 
-#include "../tcc/ExampleIterator.tcc"
\ No newline at end of file
+#pragma region implementation
+
+namespace ell
+{
+namespace data
+{
+    template <typename ExampleType>
+    ExampleIterator<ExampleType>::ExampleIterator(std::unique_ptr<IExampleIterator<ExampleType>>&& iterator) :
+        _iterator(std::move(iterator))
+    {
+    }
+} // namespace data
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/data/include/SingleLineParsingExampleIterator.h b/libraries/data/include/SingleLineParsingExampleIterator.h
index 099833bf3..fbfcaa3f4 100644
--- a/libraries/data/include/SingleLineParsingExampleIterator.h
+++ b/libraries/data/include/SingleLineParsingExampleIterator.h
@@ -81,4 +81,69 @@ namespace data
 } // namespace data
 } // namespace ell
 
-#include "../tcc/SingleLineParsingExampleIterator.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace data
+{
+    template <typename TextLineIteratorType, typename MetadataParserType, typename DataVectorParserType>
+    SingleLineParsingExampleIterator<TextLineIteratorType, MetadataParserType, DataVectorParserType>::SingleLineParsingExampleIterator(TextLineIteratorType textLineIterator, MetadataParserType metadataParser, DataVectorParserType dataVectorParser) :
+        _textLineIterator(std::move(textLineIterator)),
+        _metadataParser(std::move(metadataParser)),
+        _dataVectorParser(std::move(dataVectorParser))
+    {
+        if (_textLineIterator.IsValid())
+        {
+            ReadExample();
+        }
+    }
+
+    template <typename TextLineIteratorType, typename MetadataParserType, typename DataVectorParserType>
+    void SingleLineParsingExampleIterator<TextLineIteratorType, MetadataParserType, DataVectorParserType>::Next()
+    {
+        _textLineIterator.Next();
+        ReadExample();
+    }
+
+    template <typename TextLineIteratorType, typename MetadataParserType, typename DataVectorParserType>
+    void SingleLineParsingExampleIterator<TextLineIteratorType, MetadataParserType, DataVectorParserType>::ReadExample()
+    {
+        // get a line - skip lines that contain just whitespace or just a comment
+        TextLine line = _textLineIterator.GetTextLine();
+        line.TrimLeadingWhitespace();
+
+        while (line.IsEndOfContent())
+        {
+            _textLineIterator.Next();
+            if (!_textLineIterator.IsValid())
+            {
+                return;
+            }
+
+            line = _textLineIterator.GetTextLine();
+            line.TrimLeadingWhitespace();
+        }
+
+        // parse metadata
+        auto metaData = _metadataParser.Parse(line);
+
+        // parse datavector
+        auto dataVector = _dataVectorParser.Parse(line);
+
+        // cache the parsed example
+        _currentExample = ExampleType(std::move(dataVector), std::move(metaData));
+    }
+
+    template <typename TextLineIteratorType, typename MetadataParserType, typename DataVectorParserType>
+    auto MakeSingleLineParsingExampleIterator(TextLineIteratorType textLineIterator, MetadataParserType metadataParser, DataVectorParserType dataVectorParser)
+    {
+        using ExampleType = ParserExample<DataVectorParserType, MetadataParserType>;
+        using IteratorType = SingleLineParsingExampleIterator<TextLineIteratorType, MetadataParserType, DataVectorParserType>;
+        auto iterator = std::make_unique<IteratorType>(std::move(textLineIterator), std::move(metadataParser), std::move(dataVectorParser));
+        return ExampleIterator<ExampleType>(std::move(iterator));
+    }
+} // namespace data
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/data/include/SparseBinaryDataVector.h b/libraries/data/include/SparseBinaryDataVector.h
index 3351d5f6c..412997667 100644
--- a/libraries/data/include/SparseBinaryDataVector.h
+++ b/libraries/data/include/SparseBinaryDataVector.h
@@ -218,6 +218,156 @@ namespace data
 } // namespace data
 } // namespace ell
 
-#include "../tcc/SparseBinaryDataVector.tcc"
+#pragma region implementation
+
+#include <utilities/include/Exception.h>
+
+namespace ell
+{
+namespace data
+{
+    template <typename IndexListType>
+    SparseBinaryDataVectorIterator<IterationPolicy::skipZeros, IndexListType>::SparseBinaryDataVectorIterator(const IndexIteratorType& listIterator, size_t size) :
+        _indexIterator(listIterator),
+        _size(size)
+    {
+    }
+
+    template <typename IndexListType>
+    void SparseBinaryDataVectorIterator<IterationPolicy::all, IndexListType>::Next()
+    {
+        if (_index == _iteratorIndex)
+        {
+            _indexIterator.Next();
+            _iteratorIndex = _indexIterator.IsValid() ? _indexIterator.Get() : _size;
+        }
+        ++_index;
+    }
+
+    template <typename IndexListType>
+    IndexValue SparseBinaryDataVectorIterator<IterationPolicy::all, IndexListType>::Get() const
+    {
+        return _index == _iteratorIndex ? IndexValue{ _index, 1.0 } : IndexValue{ _index, 0.0 };
+    }
+
+    template <typename IndexListType>
+    SparseBinaryDataVectorIterator<IterationPolicy::all, IndexListType>::SparseBinaryDataVectorIterator(const IndexIteratorType& listIterator, size_t size) :
+        _indexIterator(listIterator),
+        _size(size)
+    {
+        _iteratorIndex = _indexIterator.IsValid() ? _indexIterator.Get() : _size;
+    }
+
+    template <typename IndexListType>
+    template <typename IndexValueIteratorType, IsIndexValueIterator<IndexValueIteratorType> Concept>
+    SparseBinaryDataVectorBase<IndexListType>::SparseBinaryDataVectorBase(IndexValueIteratorType indexValueIterator)
+    {
+        AppendElements(std::move(indexValueIterator));
+    }
+
+    template <typename IndexListType>
+    template <IterationPolicy policy>
+    auto SparseBinaryDataVectorBase<IndexListType>::GetIterator(size_t size) const -> Iterator<policy>
+    {
+        return Iterator<policy>(_indexList.GetIterator(), size);
+    }
+
+    template <typename IndexListType>
+    template <IterationPolicy policy>
+    auto SparseBinaryDataVectorBase<IndexListType>::GetIterator() const -> Iterator<policy>
+    {
+        return GetIterator<policy>(PrefixLength());
+    }
+
+    template <typename IndexListType>
+    SparseBinaryDataVectorBase<IndexListType>::SparseBinaryDataVectorBase(std::initializer_list<IndexValue> list)
+    {
+        AppendElements(std::move(list));
+    }
+
+    template <typename IndexListType>
+    SparseBinaryDataVectorBase<IndexListType>::SparseBinaryDataVectorBase(std::initializer_list<double> list)
+    {
+        AppendElements(std::move(list));
+    }
+
+    template <typename IndexListType>
+    SparseBinaryDataVectorBase<IndexListType>::SparseBinaryDataVectorBase(std::vector<IndexValue> vec)
+    {
+        AppendElements(std::move(vec));
+    }
+
+    template <typename IndexListType>
+    SparseBinaryDataVectorBase<IndexListType>::SparseBinaryDataVectorBase(std::vector<double> vec)
+    {
+        AppendElements(std::move(vec));
+    }
+
+    template <typename IndexListType>
+    void SparseBinaryDataVectorBase<IndexListType>::AppendElement(size_t index, double value)
+    {
+        if (value == 0)
+        {
+            return;
+        }
+
+        if (value != 1)
+        {
+            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "Input to BinaryDataVector needs to be 0 or 1");
+        }
+
+        _indexList.Append(index);
+    }
+
+    template <typename IndexListType>
+    size_t SparseBinaryDataVectorBase<IndexListType>::PrefixLength() const
+    {
+        if (_indexList.Size() == 0)
+        {
+            return 0;
+        }
+        else
+        {
+            return _indexList.Max() + 1;
+        }
+    }
+
+    template <typename IndexListType>
+    double SparseBinaryDataVectorBase<IndexListType>::Dot(math::UnorientedConstVectorBase<double> vector) const
+    {
+        double value = 0.0;
+
+        auto iter = _indexList.GetIterator();
+        while (iter.IsValid())
+        {
+            value += vector[iter.Get()];
+            iter.Next();
+        }
+
+        return value;
+    }
+
+    template <typename IndexListType>
+    void SparseBinaryDataVectorBase<IndexListType>::AddTo(math::RowVectorReference<double> vector) const
+    {
+        auto iter = _indexList.GetIterator();
+        auto size = vector.Size();
+
+        while (iter.IsValid())
+        {
+            auto index = iter.Get();
+            if (index >= size)
+            {
+                return;
+            }
+
+            vector[index] += 1.0;
+            iter.Next();
+        }
+    }
+} // namespace data
+} // namespace ell
+
+#pragma endregion implementation
 
 #endif // SPARSEBINARYDATAVECTOR_H
diff --git a/libraries/data/include/SparseDataVector.h b/libraries/data/include/SparseDataVector.h
index 180d53f67..259758334 100644
--- a/libraries/data/include/SparseDataVector.h
+++ b/libraries/data/include/SparseDataVector.h
@@ -220,6 +220,158 @@ namespace data
 } // namespace data
 } // namespace ell
 
-#include "../tcc/SparseDataVector.tcc"
+#pragma region implementation
+
+#include <utilities/include/Exception.h>
+
+namespace ell
+{
+namespace data
+{
+
+    template <typename ElementType, typename IndexListType>
+    SparseDataVectorIterator<IterationPolicy::skipZeros, ElementType, IndexListType>::SparseDataVectorIterator(
+        const IndexIteratorType& index_iterator,
+        const ValueIteratorType& valueIterator,
+        size_t size) :
+        _indexIterator(index_iterator),
+        _valueIterator(valueIterator),
+        _size(size)
+    {
+    }
+    template <typename ElementType, typename IndexListType>
+    bool SparseDataVectorIterator<IterationPolicy::skipZeros, ElementType, IndexListType>::IsValid() const
+    {
+        return _indexIterator.IsValid() && _indexIterator.Get() < _size;
+    }
+
+    template <typename ElementType, typename IndexListType>
+    void SparseDataVectorIterator<IterationPolicy::skipZeros, ElementType, IndexListType>::Next()
+    {
+        _indexIterator.Next();
+        ++_valueIterator;
+    }
+
+    template <typename ElementType, typename IndexListType>
+    IndexValue SparseDataVectorIterator<IterationPolicy::skipZeros, ElementType, IndexListType>::Get() const
+    {
+        return IndexValue{ _indexIterator.Get(), static_cast<double>(*_valueIterator) };
+    }
+
+    template <typename ElementType, typename IndexListType>
+    void SparseDataVectorIterator<IterationPolicy::all, ElementType, IndexListType>::Next()
+    {
+        if (_index == _iteratorIndex)
+        {
+            _indexIterator.Next();
+            ++_valueIterator;
+            if (_indexIterator.IsValid() && _indexIterator.Get() < _size)
+            {
+                _iteratorIndex = _indexIterator.Get();
+            }
+            else
+            {
+                _iteratorIndex = _size;
+            }
+        }
+        ++_index;
+    }
+
+    template <typename ElementType, typename IndexListType>
+    IndexValue SparseDataVectorIterator<IterationPolicy::all, ElementType, IndexListType>::Get() const
+    {
+        if (_index == _iteratorIndex)
+        {
+            return IndexValue{ _index, static_cast<double>(*_valueIterator) };
+        }
+        return IndexValue{ _index, 0.0 };
+    }
+
+    template <typename ElementType, typename IndexListType>
+    SparseDataVectorIterator<IterationPolicy::all, ElementType, IndexListType>::SparseDataVectorIterator(const IndexIteratorType& indexIterator, const ValueIteratorType& valueIterator, size_t size) :
+        _indexIterator(indexIterator),
+        _valueIterator(valueIterator),
+        _size(size)
+    {
+        _iteratorIndex = _indexIterator.IsValid() ? _indexIterator.Get() : _size;
+    }
+
+    template <typename ElementType, typename IndexListType>
+    template <typename SparseIteratorType, IsIndexValueIterator<SparseIteratorType> Concept>
+    SparseDataVector<ElementType, IndexListType>::SparseDataVector(SparseIteratorType SparseIterator)
+    {
+        AppendElements(std::move(SparseIterator));
+    }
+
+    template <typename ElementType, typename IndexListType>
+    template <IterationPolicy policy>
+    auto SparseDataVector<ElementType, IndexListType>::GetIterator(size_t size) const -> Iterator<policy>
+    {
+        return Iterator<policy>(_indexList.GetIterator(), _values.cbegin(), size);
+    }
+
+    template <typename ElementType, typename IndexListType>
+    SparseDataVector<ElementType, IndexListType>::SparseDataVector(std::initializer_list<IndexValue> list)
+    {
+        AppendElements(std::move(list));
+    }
+
+    template <typename ElementType, typename IndexListType>
+    SparseDataVector<ElementType, IndexListType>::SparseDataVector(std::initializer_list<double> list)
+    {
+        AppendElements(std::move(list));
+    }
+
+    template <typename ElementType, typename IndexListType>
+    SparseDataVector<ElementType, IndexListType>::SparseDataVector(std::vector<IndexValue> vec)
+    {
+        AppendElements(std::move(vec));
+    }
+
+    template <typename ElementType, typename IndexListType>
+    SparseDataVector<ElementType, IndexListType>::SparseDataVector(std::vector<double> vec)
+    {
+        AppendElements(std::move(vec));
+    }
+
+    template <typename ElementType, typename IndexListType>
+    void SparseDataVector<ElementType, IndexListType>::AppendElement(size_t index, double value)
+    {
+        if (value == 0)
+        {
+            return;
+        }
+
+        ElementType storedValue = static_cast<ElementType>(value);
+        assert(storedValue - value <= 1.0e-6 && value - storedValue <= 1.0e-6);
+
+        if (_indexList.Size() > 0)
+        {
+            if (index <= _indexList.Max())
+            {
+                throw utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "Can only append values to the end of a data vector");
+            }
+        }
+
+        _indexList.Append(index);
+        _values.push_back(storedValue);
+    }
+
+    template <typename ElementType, typename IndexListType>
+    size_t SparseDataVector<ElementType, IndexListType>::PrefixLength() const
+    {
+        if (_indexList.Size() == 0)
+        {
+            return 0;
+        }
+        else
+        {
+            return _indexList.Max() + 1;
+        }
+    }
+} // namespace data
+} // namespace ell
+
+#pragma endregion implementation
 
 #endif // SPARSEDATAVECTOR_H
diff --git a/libraries/data/include/StlIndexValueIterator.h b/libraries/data/include/StlIndexValueIterator.h
index 32d398287..9a60ca62a 100644
--- a/libraries/data/include/StlIndexValueIterator.h
+++ b/libraries/data/include/StlIndexValueIterator.h
@@ -116,4 +116,80 @@ namespace data
 } // namespace data
 } // namespace ell
 
-#include "../tcc/StlIndexValueIterator.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace data
+{
+    template <typename IteratorType>
+    StlIndexValueIterator<IterationPolicy::skipZeros, IteratorType>::StlIndexValueIterator(const IteratorType& begin, const IteratorType& end, size_t size) :
+        _current(begin),
+        _end(end),
+        _size(size),
+        _index(0)
+    {
+        SkipZeros();
+    }
+
+    template <typename IteratorType>
+    void StlIndexValueIterator<IterationPolicy::skipZeros, IteratorType>::Next()
+    {
+        ++_current;
+        ++_index;
+        SkipZeros();
+    }
+
+    template <typename IteratorType>
+    void StlIndexValueIterator<IterationPolicy::skipZeros, IteratorType>::SkipZeros()
+    {
+        while (_current < _end && *_current == 0)
+        {
+            ++_current;
+            ++_index;
+        }
+    }
+
+    template <typename IteratorType>
+    StlIndexValueIterator<IterationPolicy::all, IteratorType>::StlIndexValueIterator(const IteratorType& begin, const IteratorType& end, size_t size) :
+        _current(begin),
+        _end(end),
+        _size(size)
+    {
+    }
+
+    template <typename IteratorType>
+    void StlIndexValueIterator<IterationPolicy::all, IteratorType>::Next()
+    {
+        ++_index;
+        if (_current < _end)
+        {
+            ++_current;
+        }
+    }
+
+    template <typename IteratorType>
+    IndexValue StlIndexValueIterator<IterationPolicy::all, IteratorType>::Get() const
+    {
+        return _current < _end ? IndexValue{ _index, (double)*_current } : IndexValue{ _index, 0.0 };
+    }
+
+    //
+    // Convenience function to create iterator
+    //
+
+    template <IterationPolicy policy, typename ElementType>
+    VectorIndexValueIterator<policy, ElementType> MakeVectorIndexValueIterator(const std::vector<ElementType>& vector)
+    {
+        return VectorIndexValueIterator<policy, ElementType>(vector.cbegin(), vector.cend(), vector.size());
+    }
+
+    template <IterationPolicy policy, typename ElementType>
+    VectorIndexValueIterator<policy, ElementType> MakeVectorIndexValueIterator(const std::vector<ElementType>& vector, size_t size)
+    {
+        return VectorIndexValueIterator<policy, ElementType>(vector.cbegin(), vector.cend(), size);
+    }
+} // namespace data
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/data/include/TextLine.h b/libraries/data/include/TextLine.h
index 151afe8c3..84fb59cc7 100644
--- a/libraries/data/include/TextLine.h
+++ b/libraries/data/include/TextLine.h
@@ -90,4 +90,41 @@ namespace data
 } // namespace data
 } // namespace ell
 
-#include "../tcc/TextLine.tcc"
\ No newline at end of file
+#pragma region implementation
+
+#include <utilities/include/CStringParser.h>
+#include <utilities/include/Exception.h>
+
+namespace ell
+{
+namespace data
+{
+    template <typename ValueType>
+    void TextLine::ParseAdvance(ValueType& value)
+    {
+        auto result = utilities::Parse(_currentChar, value);
+        if (result != utilities::ParseResult::success)
+        {
+            throw utilities::DataFormatException(utilities::DataFormatErrors::badFormat, "could not parse value");
+        }
+    }
+
+    template <typename ValueType>
+    size_t TextLine::TryParse(ValueType& value) const
+    {
+        auto temp = _currentChar;
+        auto result = utilities::Parse(temp, value);
+        if (result == utilities::ParseResult::success)
+        {
+            auto stepSize = static_cast<size_t>(temp - _currentChar);
+            return stepSize;
+        }
+        else
+        {
+            return 0;
+        }
+    }
+} // namespace data
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/data/include/TransformedDataVector.h b/libraries/data/include/TransformedDataVector.h
index 441da5fa7..cc81e3162 100644
--- a/libraries/data/include/TransformedDataVector.h
+++ b/libraries/data/include/TransformedDataVector.h
@@ -64,4 +64,31 @@ namespace data
 } // namespace data
 } // namespace ell
 
-#include "../tcc/TransformedDataVector.tcc"
\ No newline at end of file
+#pragma region implementation
+
+namespace ell
+{
+namespace data
+{
+    template <IterationPolicy policy, typename DataVectorType, typename TransformationType>
+    TransformedDataVector<policy, DataVectorType, TransformationType>::TransformedDataVector(const DataVectorType& dataVector, TransformationType transformation) :
+        _dataVector(dataVector),
+        _transformation(transformation)
+    {
+    }
+
+    template <IterationPolicy policy, typename DataVectorType, typename TransformationType>
+    TransformedDataVector<policy, DataVectorType, TransformationType> MakeTransformedDataVector(const DataVectorType& dataVector, TransformationType transformation)
+    {
+        return TransformedDataVector<policy, DataVectorType, TransformationType>(dataVector, transformation);
+    }
+
+    template <IterationPolicy policy, typename DataVectorType, typename TransformationType>
+    void operator+=(math::RowVectorReference<double> vector, const TransformedDataVector<policy, DataVectorType, TransformationType>& transformedDataVector)
+    {
+        AddTransformedTo<DataVectorType, policy>(transformedDataVector.GetDataVector(), vector, transformedDataVector.GetTransformation());
+    }
+} // namespace data
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/data/include/TransformingIndexValueIterator.h b/libraries/data/include/TransformingIndexValueIterator.h
index 3cae0c027..9a98b8db0 100644
--- a/libraries/data/include/TransformingIndexValueIterator.h
+++ b/libraries/data/include/TransformingIndexValueIterator.h
@@ -61,4 +61,26 @@ namespace data
 } // namespace data
 } // namespace ell
 
-#include "../tcc/TransformingIndexValueIterator.tcc"
\ No newline at end of file
+#pragma region implementation
+
+namespace ell
+{
+namespace data
+{
+    template <typename WrappedIndexValueIteratorType, typename TransformationType>
+    TransformingIndexValueIterator<WrappedIndexValueIteratorType, TransformationType>::TransformingIndexValueIterator(WrappedIndexValueIteratorType wrappedIterator, TransformationType transform) :
+        _wrappedIterator(std::move(wrappedIterator)),
+        _transform(std::move(transform))
+    {
+    }
+
+    template <typename WrappedIndexValueIteratorType, typename TransformationType>
+    IndexValue TransformingIndexValueIterator<WrappedIndexValueIteratorType, TransformationType>::Get() const
+    {
+        auto indexValue = _wrappedIterator.Get();
+        return { indexValue.index, _transform(indexValue) };
+    }
+} // namespace data
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/data/tcc/AutoDataVector.tcc b/libraries/data/tcc/AutoDataVector.tcc
deleted file mode 100644
index 007c014d1..000000000
--- a/libraries/data/tcc/AutoDataVector.tcc
+++ /dev/null
@@ -1,202 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     AutoDataVectorBase.tcc (data)
-//  Authors:  Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace data
-{
-    template <typename DefaultDataVectorType>
-    AutoDataVectorBase<DefaultDataVectorType>::AutoDataVectorBase(DefaultDataVectorType&& vector)
-    {
-        FindBestRepresentation(std::move(vector));
-    }
-
-    template <typename DefaultDataVectorType>
-    template <typename IndexValueIteratorType, IsIndexValueIterator<IndexValueIteratorType> Concept>
-    AutoDataVectorBase<DefaultDataVectorType>::AutoDataVectorBase(IndexValueIteratorType indexValueIterator)
-    {
-        DefaultDataVectorType defaultDataVector(std::move(indexValueIterator));
-        FindBestRepresentation(std::move(defaultDataVector));
-    }
-
-    template <typename DefaultDataVectorType>
-    AutoDataVectorBase<DefaultDataVectorType>::AutoDataVectorBase(std::initializer_list<IndexValue> list)
-    {
-        DefaultDataVectorType defaultDataVector(std::move(list));
-        FindBestRepresentation(std::move(defaultDataVector));
-    }
-
-    template <typename DefaultDataVectorType>
-    AutoDataVectorBase<DefaultDataVectorType>::AutoDataVectorBase(std::initializer_list<double> list)
-    {
-        DefaultDataVectorType defaultDataVector(std::move(list));
-        FindBestRepresentation(std::move(defaultDataVector));
-    }
-
-    template <typename DefaultDataVectorType>
-    AutoDataVectorBase<DefaultDataVectorType>::AutoDataVectorBase(std::vector<IndexValue> vec)
-    {
-        DefaultDataVectorType defaultDataVector(std::move(vec));
-        FindBestRepresentation(std::move(defaultDataVector));
-    }
-
-    template <typename DefaultDataVectorType>
-    AutoDataVectorBase<DefaultDataVectorType>::AutoDataVectorBase(std::vector<double> vec)
-    {
-        DefaultDataVectorType defaultDataVector(std::move(vec));
-        FindBestRepresentation(std::move(defaultDataVector));
-    }
-
-    template <typename DefaultDataVectorType>
-    void AutoDataVectorBase<DefaultDataVectorType>::AppendElement(size_t /*index*/, double /*value*/)
-    {
-        throw utilities::LogicException(utilities::LogicExceptionErrors::notImplemented, "Append element not supported for AutoDataVector");
-    }
-
-    template <typename DefaultDataVectorType>
-    double AutoDataVectorBase<DefaultDataVectorType>::Dot(math::UnorientedConstVectorBase<double> vector) const
-    {
-        return _pInternal->Dot(vector);
-    }
-
-    template <typename DefaultDataVectorType>
-    float AutoDataVectorBase<DefaultDataVectorType>::Dot(math::UnorientedConstVectorBase<float> vector) const
-    {
-        return _pInternal->Dot(vector);
-    }
-
-    template <typename DefaultDataVectorType>
-    void AutoDataVectorBase<DefaultDataVectorType>::AddTo(math::RowVectorReference<double> vector) const
-    {
-        _pInternal->AddTo(vector);
-    }
-
-    template <typename DefaultDataVectorType>
-    std::vector<double> AutoDataVectorBase<DefaultDataVectorType>::ToArray(size_t size) const
-    {
-        return _pInternal->ToArray(size);
-    }
-
-    template <typename DefaultDataVectorType>
-    void AutoDataVectorBase<DefaultDataVectorType>::Print(std::ostream& os) const
-    {
-        _pInternal->Print(os);
-    }
-
-    template <typename DefaultDataVectorType>
-    template <IterationPolicy policy, typename TransformationType>
-    void AutoDataVectorBase<DefaultDataVectorType>::AddTransformedTo(math::RowVectorReference<double> vector, TransformationType transformation) const
-    {
-        _pInternal->AddTransformedTo<policy>(vector, transformation);
-    }
-
-    template <typename DefaultDataVectorType>
-    template <typename ReturnType, typename... ArgTypes>
-    ReturnType AutoDataVectorBase<DefaultDataVectorType>::CopyAs(ArgTypes... args) const
-    {
-        return _pInternal->CopyAs<ReturnType>(args...);
-    }
-
-    template <typename DefaultDataVectorType>
-    template <IterationPolicy policy, typename ReturnType, typename... ArgTypes>
-    ReturnType AutoDataVectorBase<DefaultDataVectorType>::TransformAs(ArgTypes... args) const
-    {
-        return _pInternal->TransformAs<policy, ReturnType>(args...);
-    }
-
-    template <typename TargetType>
-    bool DoesCastModifyValue(double value)
-    {
-        double target = static_cast<double>(static_cast<TargetType>(value));
-        return (target - value > APPROXIMATION_TOLERANCE) || (value - target > APPROXIMATION_TOLERANCE);
-    }
-
-    template <typename DefaultDataVectorType>
-    void AutoDataVectorBase<DefaultDataVectorType>::FindBestRepresentation(DefaultDataVectorType defaultDataVector)
-    {
-        size_t numNonZeros = 0;
-        bool includesNonFloats = false;
-        bool includesNonShorts = false;
-        bool includesNonBytes = false;
-        bool includesNonBinary = false;
-
-        auto iter = GetIterator<DefaultDataVectorType, IterationPolicy::skipZeros>(defaultDataVector);
-        while (iter.IsValid())
-        {
-            double value = iter.Get().value;
-
-            ++numNonZeros;
-            includesNonFloats |= DoesCastModifyValue<float>(value);
-            includesNonShorts |= DoesCastModifyValue<short>(value);
-            includesNonBytes |= DoesCastModifyValue<char>(value);
-            includesNonBinary |= (value != 1 && value != 0);
-
-            iter.Next();
-        }
-
-        // dense
-        if (numNonZeros > SPARSE_THRESHOLD * defaultDataVector.PrefixLength())
-        {
-            if (includesNonFloats)
-            {
-                SetInternal<DoubleDataVector>(std::move(defaultDataVector));
-            }
-            else if (includesNonShorts)
-            {
-                SetInternal<FloatDataVector>(std::move(defaultDataVector));
-            }
-            else if (includesNonBytes)
-            {
-                SetInternal<ShortDataVector>(std::move(defaultDataVector));
-            }
-            else
-            {
-                SetInternal<ByteDataVector>(std::move(defaultDataVector));
-            }
-        }
-
-        // sparse
-        else
-        {
-            if (includesNonFloats)
-            {
-                SetInternal<SparseDoubleDataVector>(std::move(defaultDataVector));
-            }
-            else if (includesNonShorts)
-            {
-                SetInternal<SparseFloatDataVector>(std::move(defaultDataVector));
-            }
-            else if (includesNonBytes)
-            {
-                SetInternal<SparseShortDataVector>(std::move(defaultDataVector));
-            }
-            else if (includesNonBinary)
-            {
-                SetInternal<SparseByteDataVector>(std::move(defaultDataVector));
-            }
-            else
-            {
-                SetInternal<SparseBinaryDataVector>(std::move(defaultDataVector));
-            }
-        }
-    }
-
-    template <typename DefaultDataVectorType>
-    template <typename DataVectorType, utilities::IsDifferent<DataVectorType, DefaultDataVectorType> Concept>
-    void AutoDataVectorBase<DefaultDataVectorType>::SetInternal(DefaultDataVectorType defaultDataVector)
-    {
-        _pInternal = std::make_unique<DataVectorType>(GetIterator<DefaultDataVectorType, IterationPolicy::skipZeros>(defaultDataVector));
-    }
-
-    template <typename IndexValueParsingIterator>
-    AutoDataVector AutoDataVectorParser<IndexValueParsingIterator>::Parse(TextLine& textLine)
-    {
-        return AutoDataVector(IndexValueParsingIterator(textLine));
-    }
-} // namespace data
-} // namespace ell
diff --git a/libraries/data/tcc/DataVector.tcc b/libraries/data/tcc/DataVector.tcc
deleted file mode 100644
index 41eb304b7..000000000
--- a/libraries/data/tcc/DataVector.tcc
+++ /dev/null
@@ -1,367 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     DataVector.tcc (data)
-//  Authors:  Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#include "../include/DenseDataVector.h"
-#include "../include/SparseBinaryDataVector.h"
-#include "../include/SparseDataVector.h"
-#include "../include/TransformingIndexValueIterator.h"
-
-namespace ell
-{
-namespace data
-{
-    template <typename ReturnType, typename GenericLambdaType>
-    ReturnType IDataVector::InvokeWithThis(GenericLambdaType lambda) const
-    {
-        auto type = GetType();
-        switch (type)
-        {
-        case Type::DoubleDataVector:
-            return lambda(static_cast<const DoubleDataVector*>(this));
-
-        case Type::FloatDataVector:
-            return lambda(static_cast<const FloatDataVector*>(this));
-
-        case Type::ShortDataVector:
-            return lambda(static_cast<const ShortDataVector*>(this));
-
-        case Type::ByteDataVector:
-            return lambda(static_cast<const ByteDataVector*>(this));
-
-        case Type::SparseDoubleDataVector:
-            return lambda(static_cast<const SparseDoubleDataVector*>(this));
-
-        case Type::SparseFloatDataVector:
-            return lambda(static_cast<const SparseFloatDataVector*>(this));
-
-        case Type::SparseShortDataVector:
-            return lambda(static_cast<const SparseShortDataVector*>(this));
-
-        case Type::SparseByteDataVector:
-            return lambda(static_cast<const SparseByteDataVector*>(this));
-
-        case Type::SparseBinaryDataVector:
-            return lambda(static_cast<const SparseBinaryDataVector*>(this));
-
-        default:
-            throw utilities::LogicException(utilities::LogicExceptionErrors::illegalState, "attempted to cast unsupported data vector type");
-        }
-    }
-
-    template <IterationPolicy policy, typename TransformationType>
-    void IDataVector::AddTransformedTo(math::RowVectorReference<double> vector, TransformationType transformation) const
-    {
-        InvokeWithThis<void>([vector, transformation](const auto* pThis) {
-            pThis->template AddTransformedTo<policy>(vector, transformation);
-        });
-    }
-
-    template <typename ReturnType>
-    ReturnType IDataVector::CopyAs() const
-    {
-        return InvokeWithThis<ReturnType>([](const auto* pThis) {
-            return ReturnType(pThis->template GetIterator<IterationPolicy::skipZeros>());
-        });
-    }
-
-    template <IterationPolicy policy, typename ReturnType, typename TransformationType>
-    ReturnType IDataVector::TransformAs(TransformationType transformation, size_t size) const
-    {
-        return InvokeWithThis<ReturnType>([transformation, size](const auto* pThis) {
-            return ReturnType(MakeTransformingIndexValueIterator(pThis->template GetIterator<policy>(size), transformation));
-        });
-    }
-
-    template <IterationPolicy policy, typename ReturnType, typename TransformationType>
-    ReturnType IDataVector::TransformAs(TransformationType transformation) const
-    {
-        return InvokeWithThis<ReturnType>([transformation](const auto* pThis) {
-            return ReturnType(MakeTransformingIndexValueIterator(pThis->template GetIterator<policy>(), transformation));
-        });
-    }
-
-    template <class DerivedType>
-    template <typename IndexValueIteratorType, IsIndexValueIterator<IndexValueIteratorType> Concept>
-    void DataVectorBase<DerivedType>::AppendElements(IndexValueIteratorType indexValueIterator)
-    {
-        while (indexValueIterator.IsValid())
-        {
-            auto current = indexValueIterator.Get();
-            static_cast<DerivedType*>(this)->AppendElement(current.index, current.value);
-            indexValueIterator.Next();
-        }
-    }
-
-    template <class DerivedType>
-    void DataVectorBase<DerivedType>::AppendElements(std::initializer_list<IndexValue> list)
-    {
-        for (const auto& current : list)
-        {
-            static_cast<DerivedType*>(this)->AppendElement(current.index, current.value);
-        }
-    }
-
-    template <class DerivedType>
-    void DataVectorBase<DerivedType>::AppendElements(std::initializer_list<double> list)
-    {
-        size_t index = 0;
-        for (double current : list)
-        {
-            static_cast<DerivedType*>(this)->AppendElement(index++, current);
-        }
-    }
-
-    template <class DerivedType>
-    void DataVectorBase<DerivedType>::AppendElements(std::vector<IndexValue> vec)
-    {
-        for (const auto& current : vec)
-        {
-            static_cast<DerivedType*>(this)->AppendElement(current.index, current.value);
-        }
-    }
-
-    template <class DerivedType>
-    void DataVectorBase<DerivedType>::AppendElements(const std::vector<double>& vec)
-    {
-        size_t index = 0;
-        for (double current : vec)
-        {
-            static_cast<DerivedType*>(this)->AppendElement(index++, current);
-        }
-    }
-
-    template <class DerivedType>
-    void DataVectorBase<DerivedType>::AppendElements(const std::vector<float>& vec)
-    {
-        size_t index = 0;
-        for (float current : vec)
-        {
-            static_cast<DerivedType*>(this)->AppendElement(index++, current);
-        }
-    }
-
-    template <class DerivedType>
-    double DataVectorBase<DerivedType>::Norm2Squared() const
-    {
-        auto iter = GetIterator<DerivedType, IterationPolicy::skipZeros>(*static_cast<const DerivedType*>(this));
-
-        double result = 0.0;
-        while (iter.IsValid())
-        {
-            double value = iter.Get().value;
-            result += value * value;
-            iter.Next();
-        }
-        return result;
-    }
-
-    template <class DerivedType>
-    double DataVectorBase<DerivedType>::Dot(math::UnorientedConstVectorBase<double> vector) const
-    {
-        auto indexValueIterator = GetIterator<DerivedType, IterationPolicy::skipZeros>(*static_cast<const DerivedType*>(this));
-
-        double result = 0.0;
-        auto size = vector.Size();
-        while (indexValueIterator.IsValid())
-        {
-            auto indexValue = indexValueIterator.Get();
-            if (indexValue.index >= size)
-            {
-                break;
-            }
-            result += indexValue.value * vector[indexValue.index];
-            indexValueIterator.Next();
-        }
-        return result;
-    }
-
-    template <class DerivedType>
-    float DataVectorBase<DerivedType>::Dot(math::UnorientedConstVectorBase<float> vector) const
-    {
-        auto indexValueIterator = GetIterator<DerivedType, IterationPolicy::skipZeros>(*static_cast<const DerivedType*>(this));
-
-        float result = 0.0;
-        auto size = vector.Size();
-        while (indexValueIterator.IsValid())
-        {
-            auto indexValue = indexValueIterator.Get();
-            if (indexValue.index >= size)
-            {
-                break;
-            }
-            result += static_cast<float>(indexValue.value) * vector[indexValue.index];
-            indexValueIterator.Next();
-        }
-        return result;
-    }
-
-    template <class DerivedType>
-    void DataVectorBase<DerivedType>::AddTo(math::RowVectorReference<double> vector) const
-    {
-        auto indexValueIterator = GetIterator<DerivedType, IterationPolicy::skipZeros>(*static_cast<const DerivedType*>(this));
-
-        auto size = vector.Size();
-        while (indexValueIterator.IsValid())
-        {
-            auto indexValue = indexValueIterator.Get();
-            if (indexValue.index >= size)
-            {
-                return;
-            }
-            vector[indexValue.index] += indexValue.value;
-            indexValueIterator.Next();
-        }
-    }
-
-    template <class DerivedType>
-    std::vector<double> DataVectorBase<DerivedType>::ToArray(size_t size) const
-    {
-        std::vector<double> result(size);
-        auto indexValueIterator = GetIterator<DerivedType, IterationPolicy::skipZeros>(*static_cast<const DerivedType*>(this));
-
-        while (indexValueIterator.IsValid())
-        {
-            auto indexValue = indexValueIterator.Get();
-            if (indexValue.index >= size)
-            {
-                break;
-            }
-            result[indexValue.index] = indexValue.value;
-            indexValueIterator.Next();
-        }
-
-        return result;
-    }
-
-    template <class DerivedType>
-    template <IterationPolicy policy, typename TransformationType>
-    void DataVectorBase<DerivedType>::AddTransformedTo(math::RowVectorReference<double> vector, TransformationType transformation) const
-    {
-        auto size = vector.Size();
-        auto indexValueIterator = GetIterator<DerivedType, policy>(*static_cast<const DerivedType*>(this), size);
-
-        while (indexValueIterator.IsValid())
-        {
-            auto indexValue = indexValueIterator.Get();
-            if (indexValue.index >= size)
-            {
-                return;
-            }
-            double result = transformation(indexValue);
-            vector[indexValue.index] += result;
-            indexValueIterator.Next();
-        }
-    }
-
-    template <class DerivedType>
-    template <typename ReturnType>
-    ReturnType DataVectorBase<DerivedType>::CopyAs() const
-    {
-        return ReturnType(GetIterator<DerivedType, IterationPolicy::skipZeros>(*static_cast<const DerivedType*>(this)));
-    }
-
-    template <class DerivedType>
-    template <IterationPolicy policy, typename ReturnType, typename TransformationType>
-    ReturnType DataVectorBase<DerivedType>::TransformAs(TransformationType transformation, size_t size) const
-    {
-        return ReturnType(MakeTransformingIndexValueIterator(GetIterator<DerivedType, policy>(*static_cast<const DerivedType*>(this), size), std::move(transformation)));
-    }
-
-    template <class DerivedType>
-    template <IterationPolicy policy, typename ReturnType, typename TransformationType>
-    ReturnType DataVectorBase<DerivedType>::TransformAs(TransformationType transformation) const
-    {
-        return ReturnType(MakeTransformingIndexValueIterator(GetIterator<DerivedType, policy>(*static_cast<const DerivedType*>(this)), std::move(transformation)));
-    }
-
-    template <class DerivedType>
-    void DataVectorBase<DerivedType>::Print(std::ostream& os) const
-    {
-        auto indexValueIterator = GetIterator<DerivedType, IterationPolicy::skipZeros>(*static_cast<const DerivedType*>(this));
-        if (indexValueIterator.IsValid())
-        {
-            auto indexValue = indexValueIterator.Get();
-            os << indexValue.index << ":" << indexValue.value;
-            indexValueIterator.Next();
-        }
-
-        while (indexValueIterator.IsValid())
-        {
-            auto indexValue = indexValueIterator.Get();
-            os << '\t' << indexValue.index << ":" << indexValue.value;
-            indexValueIterator.Next();
-        }
-    }
-
-    template <typename DataVectorType, IterationPolicy policy, typename TransformationType>
-    static void AddTransformedTo(const DataVectorType& dataVector, math::RowVectorReference<double> vector, TransformationType transformation)
-    {
-        return dataVector.template AddTransformedTo<policy, TransformationType>(vector, transformation);
-    }
-
-    template <typename DataVectorType, IterationPolicy policy>
-    static auto GetIterator(DataVectorType& vector)
-    {
-        return vector.template GetIterator<policy>();
-    }
-
-    template <typename DataVectorType, IterationPolicy policy>
-    static auto GetIterator(const DataVectorType& vector)
-    {
-        return vector.template GetIterator<policy>();
-    }
-
-    template <typename DataVectorType, IterationPolicy policy>
-    static auto GetIterator(DataVectorType& vector, size_t size)
-    {
-        return vector.template GetIterator<policy>(size);
-    }
-
-    template <typename DataVectorType, IterationPolicy policy>
-    static auto GetIterator(const DataVectorType& vector, size_t size)
-    {
-        return vector.template GetIterator<policy>(size);
-    }
-
-    template <typename DataVectorType, typename ReturnType>
-    static ReturnType CopyAs(DataVectorType& vector)
-    {
-        return vector.template CopyAs<ReturnType>();
-    }
-
-    template <typename DataVectorType, typename ReturnType>
-    static ReturnType CopyAs(const DataVectorType& vector)
-    {
-        return vector.template CopyAs<ReturnType>();
-    }
-
-    template <typename DataVectorType, IterationPolicy policy, typename ReturnType, typename TransformationType>
-    static ReturnType TransformAs(DataVectorType& vector, TransformationType transformation, size_t size)
-    {
-        return vector.template TransformAs<policy, ReturnType, TransformationType>(transformation, size);
-    }
-
-    template <typename DataVectorType, IterationPolicy policy, typename ReturnType, typename TransformationType>
-    static ReturnType TransformAs(const DataVectorType& vector, TransformationType transformation, size_t size)
-    {
-        return vector.template TransformAs<policy, ReturnType, TransformationType>(transformation, size);
-    }
-
-    template <typename DataVectorType, IterationPolicy policy, typename ReturnType, typename TransformationType>
-    static ReturnType TransformAs(DataVectorType& vector, TransformationType transformation)
-    {
-        return vector.template TransformAs<policy, ReturnType, TransformationType>(transformation);
-    }
-
-    template <typename DataVectorType, IterationPolicy policy, typename ReturnType, typename TransformationType>
-    static ReturnType TransformAs(const DataVectorType& vector, TransformationType transformation)
-    {
-        return vector.template TransformAs<policy, ReturnType, TransformationType>(transformation);
-    }
-} // namespace data
-} // namespace ell
diff --git a/libraries/data/tcc/DataVectorOperations.tcc b/libraries/data/tcc/DataVectorOperations.tcc
deleted file mode 100644
index 44fe00232..000000000
--- a/libraries/data/tcc/DataVectorOperations.tcc
+++ /dev/null
@@ -1,55 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     DataVectorOperations.tcc (data)
-//  Authors:  Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace data
-{
-    template <typename DataVectorType, IsDataVector<DataVectorType> Concept>
-    auto operator*(double scalar, const DataVectorType& vector)
-    {
-        return MakeTransformedDataVector<IterationPolicy::skipZeros>(vector, [scalar](IndexValue x) { return scalar * x.value; });
-    }
-
-    template <typename DataVectorType, IsDataVector<DataVectorType> Concept>
-    auto operator*(const DataVectorType& vector, double scalar)
-    {
-        return scalar * vector;
-    }
-
-    template <typename ElementType>
-    ElementType operator*(math::UnorientedConstVectorBase<ElementType> vector, const IDataVector& dataVector)
-    {
-        return dataVector.Dot(vector);
-    }
-
-    template <typename DataVectorType>
-    auto Square(const DataVectorType& vector)
-    {
-        return MakeTransformedDataVector<IterationPolicy::skipZeros>(vector, [](IndexValue x) { return x.value * x.value; });
-    }
-
-    template <typename DataVectorType>
-    auto Sqrt(const DataVectorType& vector)
-    {
-        return MakeTransformedDataVector<IterationPolicy::skipZeros>(vector, [](IndexValue x) { return std::sqrt(x.value); });
-    }
-
-    template <typename DataVectorType>
-    auto Abs(const DataVectorType& vector)
-    {
-        return MakeTransformedDataVector<IterationPolicy::skipZeros>(vector, [](IndexValue x) { return std::abs(x.value); });
-    }
-
-    template <typename DataVectorType>
-    auto ZeroIndicator(const DataVectorType& vector)
-    {
-        return MakeTransformedDataVector<IterationPolicy::all>(vector, [](IndexValue x) { return x.value == 0.0 ? 1.0 : 0.0; });
-    }
-} // namespace data
-} // namespace ell
\ No newline at end of file
diff --git a/libraries/data/tcc/Dataset.tcc b/libraries/data/tcc/Dataset.tcc
deleted file mode 100644
index 1becd80b9..000000000
--- a/libraries/data/tcc/Dataset.tcc
+++ /dev/null
@@ -1,237 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     Dataset.tcc (data)
-//  Authors:  Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#include <utilities/include/Exception.h>
-#include <utilities/include/Logger.h>
-
-#include <algorithm>
-#include <random>
-#include <stdexcept>
-
-namespace ell
-{
-namespace data
-{
-    using namespace logging;
-
-    template <typename ExampleType>
-    ExampleIterator<ExampleType> AnyDataset::GetExampleIterator() const
-    {
-        auto fromIndex = _fromIndex;
-        auto size = _size;
-        auto getExampleIterator = [fromIndex, size](const auto* pDataset) { return pDataset->template GetExampleIterator<ExampleType>(fromIndex, size); };
-
-        // all Dataset types for which GetAnyDataset() is called must be listed below, in the variadic template argument.
-        using Invoker = utilities::AbstractInvoker<DatasetBase,
-                                                   Dataset<data::AutoSupervisedExample>,
-                                                   Dataset<data::DenseSupervisedExample>>;
-
-        return Invoker::Invoke<ExampleIterator<ExampleType>>(getExampleIterator, _pDataset);
-    }
-
-    template <typename DatasetExampleType>
-    template <typename IteratorExampleType>
-    Dataset<DatasetExampleType>::DatasetExampleIterator<IteratorExampleType>::DatasetExampleIterator(InternalIteratorType begin, InternalIteratorType end) :
-        _current(begin),
-        _end(end)
-    {
-    }
-
-    template <typename DatasetExampleType>
-    Dataset<DatasetExampleType>::Dataset(ExampleIterator<DatasetExampleType> exampleIterator)
-    {
-        while (exampleIterator.IsValid())
-        {
-            AddExample(exampleIterator.Get());
-            exampleIterator.Next();
-        }
-    }
-
-    template <typename DatasetExampleType>
-    Dataset<DatasetExampleType>::Dataset(const AnyDataset& anyDataset) :
-        Dataset(anyDataset.GetExampleIterator<DatasetExampleType>())
-    {
-    }
-
-    template <typename DatasetExampleType>
-    void Dataset<DatasetExampleType>::Swap(Dataset& other)
-    {
-        std::swap(_examples, other._examples);
-        std::swap(_numFeatures, other._numFeatures);
-    }
-
-    template <typename DatasetExampleType>
-    DatasetExampleType& Dataset<DatasetExampleType>::GetExample(size_t index)
-    {
-        return _examples[index];
-    }
-
-    template <typename DatasetExampleType>
-    const DatasetExampleType& Dataset<DatasetExampleType>::GetExample(size_t index) const
-    {
-        return _examples[index];
-    }
-
-    template <typename DatasetExampleType>
-    DatasetExampleType& Dataset<DatasetExampleType>::operator[](size_t index)
-    {
-        return _examples[index];
-    }
-
-    template <typename DatasetExampleType>
-    const DatasetExampleType& Dataset<DatasetExampleType>::operator[](size_t index) const
-    {
-        return _examples[index];
-    }
-
-    template <typename DatasetExampleType>
-    template <typename IteratorExampleType>
-    ExampleIterator<IteratorExampleType> Dataset<DatasetExampleType>::GetExampleIterator(size_t fromIndex, size_t size) const
-    {
-        size = CorrectRangeSize(fromIndex, size);
-        return ExampleIterator<IteratorExampleType>(std::make_unique<DatasetExampleIterator<IteratorExampleType>>(_examples.cbegin() + fromIndex, _examples.cbegin() + fromIndex + size));
-    }
-
-    template <typename DatasetExampleType>
-    auto Dataset<DatasetExampleType>::GetExampleReferenceIterator(size_t fromIndex, size_t size) const -> ExampleReferenceIterator<DatasetExampleType>
-    {
-        size = CorrectRangeSize(fromIndex, size);
-        return ExampleReferenceIterator<DatasetExampleType>(_examples.cbegin() + fromIndex, _examples.cbegin() + fromIndex + size);
-    }
-
-    template <typename DatasetExampleType>
-    void Dataset<DatasetExampleType>::AddExample(DatasetExampleType example)
-    {
-        size_t numFeatures = example.GetDataVector().PrefixLength();
-        _examples.push_back(std::move(example));
-
-        if (_numFeatures < numFeatures)
-        {
-            _numFeatures = numFeatures;
-        }
-    }
-
-    template <typename DatasetExampleType>
-    template <typename otherExampleType>
-    Dataset<otherExampleType> Dataset<DatasetExampleType>::Transform(std::function<otherExampleType(const DatasetExampleType&)> transformationFunction)
-    {
-        Dataset<otherExampleType> dataset;
-        for (auto& example : _examples)
-        {
-            dataset.AddExample(transformationFunction(example));
-        }
-        return dataset;
-    }
-
-    template <typename DatasetExampleType>
-    void Dataset<DatasetExampleType>::Reset()
-    {
-        _examples.clear();
-        _numFeatures = 0;
-    }
-
-    template <typename DatasetExampleType>
-    void Dataset<DatasetExampleType>::RandomPermute(std::default_random_engine& rng, size_t prefixSize)
-    {
-        prefixSize = CorrectRangeSize(0, prefixSize);
-        for (size_t i = 0; i < prefixSize; ++i)
-        {
-            RandomSwap(rng, i, i, _examples.size() - i);
-        }
-    }
-
-    template <typename DatasetExampleType>
-    void Dataset<DatasetExampleType>::RandomPermute(std::default_random_engine& rng, size_t rangeFirstIndex, size_t rangeSize, size_t prefixSize)
-    {
-        rangeSize = CorrectRangeSize(rangeFirstIndex, rangeSize);
-
-        if (prefixSize > rangeSize || prefixSize == 0)
-        {
-            prefixSize = rangeSize;
-        }
-
-        for (size_t s = 0; s < prefixSize; ++s)
-        {
-            size_t index = rangeFirstIndex + s;
-            RandomSwap(rng, index, index, rangeSize - s);
-        }
-    }
-
-    template <typename DatasetExampleType>
-    void Dataset<DatasetExampleType>::RandomSwap(std::default_random_engine& rng, size_t targetExampleIndex, size_t rangeFirstIndex, size_t rangeSize)
-    {
-        using std::swap;
-        rangeSize = CorrectRangeSize(rangeFirstIndex, rangeSize);
-        if (targetExampleIndex > _examples.size())
-        {
-            throw utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange);
-        }
-
-        std::uniform_int_distribution<size_t> dist(rangeFirstIndex, rangeFirstIndex + rangeSize - 1);
-        size_t j = dist(rng);
-        swap(_examples[targetExampleIndex], _examples[j]);
-    }
-
-    template <typename DatasetExampleType>
-    template <typename SortKeyType>
-    void Dataset<DatasetExampleType>::Sort(SortKeyType sortKey, size_t fromIndex, size_t size)
-    {
-        size = CorrectRangeSize(fromIndex, size);
-
-        std::sort(_examples.begin() + fromIndex,
-                  _examples.begin() + fromIndex + size,
-                  [&](const DatasetExampleType& a, const DatasetExampleType& b) -> bool {
-                      return sortKey(a) < sortKey(b);
-                  });
-    }
-
-    template <typename DatasetExampleType>
-    template <typename PartitionKeyType>
-    void Dataset<DatasetExampleType>::Partition(PartitionKeyType partitionKey, size_t fromIndex, size_t size)
-    {
-        size = CorrectRangeSize(fromIndex, size);
-        std::partition(_examples.begin() + fromIndex, _examples.begin() + fromIndex + size, partitionKey);
-    }
-
-    template <typename DatasetExampleType>
-    void Dataset<DatasetExampleType>::Print(std::ostream& os, size_t tabs, size_t fromIndex, size_t size) const
-    {
-        size = CorrectRangeSize(fromIndex, size);
-
-        for (size_t index = fromIndex; index < fromIndex + size; ++index)
-        {
-            os << std::string(tabs * 4, ' ');
-            _examples[index].Print(os);
-            os << EOL;
-        }
-    }
-
-    template <typename DatasetExampleType>
-    std::ostream& operator<<(std::ostream& os, const Dataset<DatasetExampleType>& dataset)
-    {
-        dataset.Print(os);
-        return os;
-    }
-
-    template <typename DatasetExampleType>
-    size_t Dataset<DatasetExampleType>::CorrectRangeSize(size_t fromIndex, size_t size) const
-    {
-        if (size == 0 || fromIndex + size > _examples.size())
-        {
-            return _examples.size() - fromIndex;
-        }
-        return size;
-    }
-
-    template <typename ExampleType>
-    Dataset<ExampleType> MakeDataset(ExampleIterator<ExampleType> exampleIterator)
-    {
-        return Dataset<ExampleType>(std::move(exampleIterator));
-    }
-} // namespace data
-} // namespace ell
diff --git a/libraries/data/tcc/DenseDataVector.tcc b/libraries/data/tcc/DenseDataVector.tcc
deleted file mode 100644
index 353e45388..000000000
--- a/libraries/data/tcc/DenseDataVector.tcc
+++ /dev/null
@@ -1,113 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     DenseDataVector.tcc (data)
-//  Authors:  Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#include <utilities/include/Exception.h>
-#include <utilities/include/StringUtil.h>
-#include <utilities/include/TypeName.h>
-
-#include <cassert>
-
-namespace ell
-{
-namespace data
-{
-    template <typename ElementType>
-    DenseDataVector<ElementType>::DenseDataVector() :
-        _numNonzeros(0)
-    {
-        _data.reserve(DEFAULT_DENSE_VECTOR_CAPACITY);
-    }
-
-    template <typename ElementType>
-    template <typename IndexValueIteratorType, IsIndexValueIterator<IndexValueIteratorType> Concept>
-    DenseDataVector<ElementType>::DenseDataVector(IndexValueIteratorType indexValueIterator)
-    {
-        AppendElements(std::move(indexValueIterator));
-    }
-
-    template <typename ElementType>
-    DenseDataVector<ElementType>::DenseDataVector(std::initializer_list<IndexValue> list)
-    {
-        AppendElements(std::move(list));
-    }
-
-    template <typename ElementType>
-    DenseDataVector<ElementType>::DenseDataVector(std::initializer_list<double> list)
-    {
-        AppendElements(std::move(list));
-    }
-
-    template <typename ElementType>
-    DenseDataVector<ElementType>::DenseDataVector(std::vector<IndexValue> list)
-    {
-        AppendElements(std::move(list));
-    }
-
-    template <typename ElementType>
-    DenseDataVector<ElementType>::DenseDataVector(std::vector<double> list)
-    {
-        AppendElements(std::move(list));
-    }
-
-    template <typename ElementType>
-    DenseDataVector<ElementType>::DenseDataVector(std::vector<float> list)
-    {
-        AppendElements(std::move(list));
-    }
-
-    template <typename ElementType>
-    double DenseDataVector<ElementType>::operator[](size_t index) const
-    {
-        if (index >= _data.size())
-        {
-            return 0.0;
-        }
-        return static_cast<double>(_data[index]);
-    }
-
-    template <typename ElementType>
-    template <IterationPolicy policy>
-    VectorIndexValueIterator<policy, ElementType> DenseDataVector<ElementType>::GetIterator(size_t size) const
-    {
-        return MakeVectorIndexValueIterator<policy>(_data, size);
-    }
-
-    template <typename ElementType> // move this to datavectorbase?
-    template <IterationPolicy policy>
-    VectorIndexValueIterator<policy, ElementType> DenseDataVector<ElementType>::GetIterator() const
-    {
-        return GetIterator<policy>(PrefixLength());
-    }
-
-    template <typename ElementType>
-    void DenseDataVector<ElementType>::AppendElement(size_t index, double value)
-    {
-        if (value == 0)
-        {
-            return;
-        }
-
-        ElementType storedValue = static_cast<ElementType>(value);
-
-        if (storedValue - value > 1.0e-5 || value - storedValue > 1.0e-5)
-        {
-            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument,
-                                            utilities::FormatString("Data loss detected when storing value %f as type %s", value, utilities::GetTypeName<ElementType>().c_str()));
-        }
-
-        if (index < _data.size())
-        {
-            throw utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "Can only append values to the end of a data vector");
-        }
-
-        _data.resize(index + 1);
-        _data[index] = storedValue;
-        ++_numNonzeros;
-    }
-} // namespace data
-} // namespace ell
diff --git a/libraries/data/tcc/Example.tcc b/libraries/data/tcc/Example.tcc
deleted file mode 100644
index 641e22844..000000000
--- a/libraries/data/tcc/Example.tcc
+++ /dev/null
@@ -1,60 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     Example.tcc (data)
-//  Authors:  Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace data
-{
-    template <typename DataVectorType, typename MetadataType>
-    Example<DataVectorType, MetadataType>::Example(DataVectorType dataVector, MetadataType metadata) :
-        _dataVector(std::make_shared<const DataVectorType>(std::move(dataVector))),
-        _metadata(std::move(metadata))
-    {
-    }
-
-    template <typename DataVectorType, typename MetadataType>
-    Example<DataVectorType, MetadataType>::Example(const std::shared_ptr<const DataVectorType>& dataVector, const MetadataType& metadata) :
-        _dataVector(dataVector),
-        _metadata(metadata)
-    {
-    }
-
-    template <typename DataVectorType, typename MetadataType>
-    template <typename TargetExampleType, utilities::IsSame<typename TargetExampleType::DataVectorType, DataVectorType> Concept>
-    TargetExampleType Example<DataVectorType, MetadataType>::CopyAs() const
-    {
-        // shallow copy of data vector
-        return TargetExampleType(_dataVector, typename TargetExampleType::MetadataType(_metadata));
-    }
-
-    template <typename DataVectorType, typename MetadataType>
-    template <typename TargetExampleType, utilities::IsDifferent<typename TargetExampleType::DataVectorType, DataVectorType>>
-    TargetExampleType Example<DataVectorType, MetadataType>::CopyAs() const
-    {
-        // deep copy of data vector
-        using DataType = typename TargetExampleType::DataVectorType;
-        using TargetMetadataType = typename TargetExampleType::MetadataType;
-        return TargetExampleType(std::make_shared<DataType>(_dataVector->template CopyAs<DataType>()), TargetMetadataType(_metadata));
-    }
-
-    template <typename DataVectorType, typename MetadataType>
-    void Example<DataVectorType, MetadataType>::Print(std::ostream& os) const
-    {
-        os << _metadata;
-        os << "\t";
-        _dataVector->Print(os);
-    }
-
-    template <typename DataVectorType, typename MetadataType>
-    std::ostream& operator<<(std::ostream& ostream, const Example<DataVectorType, MetadataType>& example)
-    {
-        example.Print(ostream);
-        return ostream;
-    }
-} // namespace data
-} // namespace ell
diff --git a/libraries/data/tcc/ExampleIterator.tcc b/libraries/data/tcc/ExampleIterator.tcc
deleted file mode 100644
index e9c9487e1..000000000
--- a/libraries/data/tcc/ExampleIterator.tcc
+++ /dev/null
@@ -1,19 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     ExampleIterator.tcc (data)
-//  Authors:  Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace data
-{
-    template <typename ExampleType>
-    ExampleIterator<ExampleType>::ExampleIterator(std::unique_ptr<IExampleIterator<ExampleType>>&& iterator) :
-        _iterator(std::move(iterator))
-    {
-    }
-} // namespace data
-} // namespace ell
diff --git a/libraries/data/tcc/SingleLineParsingExampleIterator.tcc b/libraries/data/tcc/SingleLineParsingExampleIterator.tcc
deleted file mode 100644
index a5550ca29..000000000
--- a/libraries/data/tcc/SingleLineParsingExampleIterator.tcc
+++ /dev/null
@@ -1,70 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     SingleLineParsingExampleIterator.tcc (data)
-//  Authors:  Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace data
-{
-    template <typename TextLineIteratorType, typename MetadataParserType, typename DataVectorParserType>
-    SingleLineParsingExampleIterator<TextLineIteratorType, MetadataParserType, DataVectorParserType>::SingleLineParsingExampleIterator(TextLineIteratorType textLineIterator, MetadataParserType metadataParser, DataVectorParserType dataVectorParser) :
-        _textLineIterator(std::move(textLineIterator)),
-        _metadataParser(std::move(metadataParser)),
-        _dataVectorParser(std::move(dataVectorParser))
-    {
-        if (_textLineIterator.IsValid())
-        {
-            ReadExample();
-        }
-    }
-
-    template <typename TextLineIteratorType, typename MetadataParserType, typename DataVectorParserType>
-    void SingleLineParsingExampleIterator<TextLineIteratorType, MetadataParserType, DataVectorParserType>::Next()
-    {
-        _textLineIterator.Next();
-        ReadExample();
-    }
-
-    template <typename TextLineIteratorType, typename MetadataParserType, typename DataVectorParserType>
-    void SingleLineParsingExampleIterator<TextLineIteratorType, MetadataParserType, DataVectorParserType>::ReadExample()
-    {
-        // get a line - skip lines that contain just whitespace or just a comment
-        TextLine line = _textLineIterator.GetTextLine();
-        line.TrimLeadingWhitespace();
-
-        while (line.IsEndOfContent())
-        {
-            _textLineIterator.Next();
-            if (!_textLineIterator.IsValid())
-            {
-                return;
-            }
-
-            line = _textLineIterator.GetTextLine();
-            line.TrimLeadingWhitespace();
-        }
-
-        // parse metadata
-        auto metaData = _metadataParser.Parse(line);
-
-        // parse datavector
-        auto dataVector = _dataVectorParser.Parse(line);
-
-        // cache the parsed example
-        _currentExample = ExampleType(std::move(dataVector), std::move(metaData));
-    }
-
-    template <typename TextLineIteratorType, typename MetadataParserType, typename DataVectorParserType>
-    auto MakeSingleLineParsingExampleIterator(TextLineIteratorType textLineIterator, MetadataParserType metadataParser, DataVectorParserType dataVectorParser)
-    {
-        using ExampleType = ParserExample<DataVectorParserType, MetadataParserType>;
-        using IteratorType = SingleLineParsingExampleIterator<TextLineIteratorType, MetadataParserType, DataVectorParserType>;
-        auto iterator = std::make_unique<IteratorType>(std::move(textLineIterator), std::move(metadataParser), std::move(dataVectorParser));
-        return ExampleIterator<ExampleType>(std::move(iterator));
-    }
-} // namespace data
-} // namespace ell
\ No newline at end of file
diff --git a/libraries/data/tcc/SparseBinaryDataVector.tcc b/libraries/data/tcc/SparseBinaryDataVector.tcc
deleted file mode 100644
index 4e49d0d57..000000000
--- a/libraries/data/tcc/SparseBinaryDataVector.tcc
+++ /dev/null
@@ -1,155 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     SparseBinaryDataVector.tcc (data)
-//  Authors:  Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#include <utilities/include/Exception.h>
-
-namespace ell
-{
-namespace data
-{
-    template <typename IndexListType>
-    SparseBinaryDataVectorIterator<IterationPolicy::skipZeros, IndexListType>::SparseBinaryDataVectorIterator(const IndexIteratorType& listIterator, size_t size) :
-        _indexIterator(listIterator),
-        _size(size)
-    {
-    }
-
-    template <typename IndexListType>
-    void SparseBinaryDataVectorIterator<IterationPolicy::all, IndexListType>::Next()
-    {
-        if (_index == _iteratorIndex)
-        {
-            _indexIterator.Next();
-            _iteratorIndex = _indexIterator.IsValid() ? _indexIterator.Get() : _size;
-        }
-        ++_index;
-    }
-
-    template <typename IndexListType>
-    IndexValue SparseBinaryDataVectorIterator<IterationPolicy::all, IndexListType>::Get() const
-    {
-        return _index == _iteratorIndex ? IndexValue{ _index, 1.0 } : IndexValue{ _index, 0.0 };
-    }
-
-    template <typename IndexListType>
-    SparseBinaryDataVectorIterator<IterationPolicy::all, IndexListType>::SparseBinaryDataVectorIterator(const IndexIteratorType& listIterator, size_t size) :
-        _indexIterator(listIterator),
-        _size(size)
-    {
-        _iteratorIndex = _indexIterator.IsValid() ? _indexIterator.Get() : _size;
-    }
-
-    template <typename IndexListType>
-    template <typename IndexValueIteratorType, IsIndexValueIterator<IndexValueIteratorType> Concept>
-    SparseBinaryDataVectorBase<IndexListType>::SparseBinaryDataVectorBase(IndexValueIteratorType indexValueIterator)
-    {
-        AppendElements(std::move(indexValueIterator));
-    }
-
-    template <typename IndexListType>
-    template <IterationPolicy policy>
-    auto SparseBinaryDataVectorBase<IndexListType>::GetIterator(size_t size) const -> Iterator<policy>
-    {
-        return Iterator<policy>(_indexList.GetIterator(), size);
-    }
-
-    template <typename IndexListType>
-    template <IterationPolicy policy>
-    auto SparseBinaryDataVectorBase<IndexListType>::GetIterator() const -> Iterator<policy>
-    {
-        return GetIterator<policy>(PrefixLength());
-    }
-
-    template <typename IndexListType>
-    SparseBinaryDataVectorBase<IndexListType>::SparseBinaryDataVectorBase(std::initializer_list<IndexValue> list)
-    {
-        AppendElements(std::move(list));
-    }
-
-    template <typename IndexListType>
-    SparseBinaryDataVectorBase<IndexListType>::SparseBinaryDataVectorBase(std::initializer_list<double> list)
-    {
-        AppendElements(std::move(list));
-    }
-
-    template <typename IndexListType>
-    SparseBinaryDataVectorBase<IndexListType>::SparseBinaryDataVectorBase(std::vector<IndexValue> vec)
-    {
-        AppendElements(std::move(vec));
-    }
-
-    template <typename IndexListType>
-    SparseBinaryDataVectorBase<IndexListType>::SparseBinaryDataVectorBase(std::vector<double> vec)
-    {
-        AppendElements(std::move(vec));
-    }
-
-    template <typename IndexListType>
-    void SparseBinaryDataVectorBase<IndexListType>::AppendElement(size_t index, double value)
-    {
-        if (value == 0)
-        {
-            return;
-        }
-
-        if (value != 1)
-        {
-            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "Input to BinaryDataVector needs to be 0 or 1");
-        }
-
-        _indexList.Append(index);
-    }
-
-    template <typename IndexListType>
-    size_t SparseBinaryDataVectorBase<IndexListType>::PrefixLength() const
-    {
-        if (_indexList.Size() == 0)
-        {
-            return 0;
-        }
-        else
-        {
-            return _indexList.Max() + 1;
-        }
-    }
-
-    template <typename IndexListType>
-    double SparseBinaryDataVectorBase<IndexListType>::Dot(math::UnorientedConstVectorBase<double> vector) const
-    {
-        double value = 0.0;
-
-        auto iter = _indexList.GetIterator();
-        while (iter.IsValid())
-        {
-            value += vector[iter.Get()];
-            iter.Next();
-        }
-
-        return value;
-    }
-
-    template <typename IndexListType>
-    void SparseBinaryDataVectorBase<IndexListType>::AddTo(math::RowVectorReference<double> vector) const
-    {
-        auto iter = _indexList.GetIterator();
-        auto size = vector.Size();
-
-        while (iter.IsValid())
-        {
-            auto index = iter.Get();
-            if (index >= size)
-            {
-                return;
-            }
-
-            vector[index] += 1.0;
-            iter.Next();
-        }
-    }
-} // namespace data
-} // namespace ell
diff --git a/libraries/data/tcc/SparseDataVector.tcc b/libraries/data/tcc/SparseDataVector.tcc
deleted file mode 100644
index 671e4a7e8..000000000
--- a/libraries/data/tcc/SparseDataVector.tcc
+++ /dev/null
@@ -1,157 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     SparseDataVector.tcc (data)
-//  Authors:  Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#include <utilities/include/Exception.h>
-
-namespace ell
-{
-namespace data
-{
-
-    template <typename ElementType, typename IndexListType>
-    SparseDataVectorIterator<IterationPolicy::skipZeros, ElementType, IndexListType>::SparseDataVectorIterator(
-        const IndexIteratorType& index_iterator,
-        const ValueIteratorType& valueIterator,
-        size_t size) :
-        _indexIterator(index_iterator),
-        _valueIterator(valueIterator),
-        _size(size)
-    {
-    }
-    template <typename ElementType, typename IndexListType>
-    bool SparseDataVectorIterator<IterationPolicy::skipZeros, ElementType, IndexListType>::IsValid() const
-    {
-        return _indexIterator.IsValid() && _indexIterator.Get() < _size;
-    }
-
-    template <typename ElementType, typename IndexListType>
-    void SparseDataVectorIterator<IterationPolicy::skipZeros, ElementType, IndexListType>::Next()
-    {
-        _indexIterator.Next();
-        ++_valueIterator;
-    }
-
-    template <typename ElementType, typename IndexListType>
-    IndexValue SparseDataVectorIterator<IterationPolicy::skipZeros, ElementType, IndexListType>::Get() const
-    {
-        return IndexValue{ _indexIterator.Get(), static_cast<double>(*_valueIterator) };
-    }
-
-    template <typename ElementType, typename IndexListType>
-    void SparseDataVectorIterator<IterationPolicy::all, ElementType, IndexListType>::Next()
-    {
-        if (_index == _iteratorIndex)
-        {
-            _indexIterator.Next();
-            ++_valueIterator;
-            if (_indexIterator.IsValid() && _indexIterator.Get() < _size)
-            {
-                _iteratorIndex = _indexIterator.Get();
-            }
-            else
-            {
-                _iteratorIndex = _size;
-            }
-        }
-        ++_index;
-    }
-
-    template <typename ElementType, typename IndexListType>
-    IndexValue SparseDataVectorIterator<IterationPolicy::all, ElementType, IndexListType>::Get() const
-    {
-        if (_index == _iteratorIndex)
-        {
-            return IndexValue{ _index, static_cast<double>(*_valueIterator) };
-        }
-        return IndexValue{ _index, 0.0 };
-    }
-
-    template <typename ElementType, typename IndexListType>
-    SparseDataVectorIterator<IterationPolicy::all, ElementType, IndexListType>::SparseDataVectorIterator(const IndexIteratorType& indexIterator, const ValueIteratorType& valueIterator, size_t size) :
-        _indexIterator(indexIterator),
-        _valueIterator(valueIterator),
-        _size(size)
-    {
-        _iteratorIndex = _indexIterator.IsValid() ? _indexIterator.Get() : _size;
-    }
-
-    template <typename ElementType, typename IndexListType>
-    template <typename SparseIteratorType, IsIndexValueIterator<SparseIteratorType> Concept>
-    SparseDataVector<ElementType, IndexListType>::SparseDataVector(SparseIteratorType SparseIterator)
-    {
-        AppendElements(std::move(SparseIterator));
-    }
-
-    template <typename ElementType, typename IndexListType>
-    template <IterationPolicy policy>
-    auto SparseDataVector<ElementType, IndexListType>::GetIterator(size_t size) const -> Iterator<policy>
-    {
-        return Iterator<policy>(_indexList.GetIterator(), _values.cbegin(), size);
-    }
-
-    template <typename ElementType, typename IndexListType>
-    SparseDataVector<ElementType, IndexListType>::SparseDataVector(std::initializer_list<IndexValue> list)
-    {
-        AppendElements(std::move(list));
-    }
-
-    template <typename ElementType, typename IndexListType>
-    SparseDataVector<ElementType, IndexListType>::SparseDataVector(std::initializer_list<double> list)
-    {
-        AppendElements(std::move(list));
-    }
-
-    template <typename ElementType, typename IndexListType>
-    SparseDataVector<ElementType, IndexListType>::SparseDataVector(std::vector<IndexValue> vec)
-    {
-        AppendElements(std::move(vec));
-    }
-
-    template <typename ElementType, typename IndexListType>
-    SparseDataVector<ElementType, IndexListType>::SparseDataVector(std::vector<double> vec)
-    {
-        AppendElements(std::move(vec));
-    }
-
-    template <typename ElementType, typename IndexListType>
-    void SparseDataVector<ElementType, IndexListType>::AppendElement(size_t index, double value)
-    {
-        if (value == 0)
-        {
-            return;
-        }
-
-        ElementType storedValue = static_cast<ElementType>(value);
-        assert(storedValue - value <= 1.0e-6 && value - storedValue <= 1.0e-6);
-
-        if (_indexList.Size() > 0)
-        {
-            if (index <= _indexList.Max())
-            {
-                throw utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "Can only append values to the end of a data vector");
-            }
-        }
-
-        _indexList.Append(index);
-        _values.push_back(storedValue);
-    }
-
-    template <typename ElementType, typename IndexListType>
-    size_t SparseDataVector<ElementType, IndexListType>::PrefixLength() const
-    {
-        if (_indexList.Size() == 0)
-        {
-            return 0;
-        }
-        else
-        {
-            return _indexList.Max() + 1;
-        }
-    }
-} // namespace data
-} // namespace ell
diff --git a/libraries/data/tcc/StlIndexValueIterator.tcc b/libraries/data/tcc/StlIndexValueIterator.tcc
deleted file mode 100644
index 685c8473f..000000000
--- a/libraries/data/tcc/StlIndexValueIterator.tcc
+++ /dev/null
@@ -1,81 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     StlIndexValueIterator.h (data)
-//  Authors:  Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace data
-{
-    template <typename IteratorType>
-    StlIndexValueIterator<IterationPolicy::skipZeros, IteratorType>::StlIndexValueIterator(const IteratorType& begin, const IteratorType& end, size_t size) :
-        _current(begin),
-        _end(end),
-        _size(size),
-        _index(0)
-    {
-        SkipZeros();
-    }
-
-    template <typename IteratorType>
-    void StlIndexValueIterator<IterationPolicy::skipZeros, IteratorType>::Next()
-    {
-        ++_current;
-        ++_index;
-        SkipZeros();
-    }
-
-    template <typename IteratorType>
-    void StlIndexValueIterator<IterationPolicy::skipZeros, IteratorType>::SkipZeros()
-    {
-        while (_current < _end && *_current == 0)
-        {
-            ++_current;
-            ++_index;
-        }
-    }
-
-    template <typename IteratorType>
-    StlIndexValueIterator<IterationPolicy::all, IteratorType>::StlIndexValueIterator(const IteratorType& begin, const IteratorType& end, size_t size) :
-        _current(begin),
-        _end(end),
-        _size(size)
-    {
-    }
-
-    template <typename IteratorType>
-    void StlIndexValueIterator<IterationPolicy::all, IteratorType>::Next()
-    {
-        ++_index;
-        if (_current < _end)
-        {
-            ++_current;
-        }
-    }
-
-    template <typename IteratorType>
-    IndexValue StlIndexValueIterator<IterationPolicy::all, IteratorType>::Get() const
-    {
-        return _current < _end ? IndexValue{ _index, (double)*_current } : IndexValue{ _index, 0.0 };
-    }
-
-    //
-    // Convenience function to create iterator
-    //
-
-    template <IterationPolicy policy, typename ElementType>
-    VectorIndexValueIterator<policy, ElementType> MakeVectorIndexValueIterator(const std::vector<ElementType>& vector)
-    {
-        return VectorIndexValueIterator<policy, ElementType>(vector.cbegin(), vector.cend(), vector.size());
-    }
-
-    template <IterationPolicy policy, typename ElementType>
-    VectorIndexValueIterator<policy, ElementType> MakeVectorIndexValueIterator(const std::vector<ElementType>& vector, size_t size)
-    {
-        return VectorIndexValueIterator<policy, ElementType>(vector.cbegin(), vector.cend(), size);
-    }
-} // namespace data
-} // namespace ell
diff --git a/libraries/data/tcc/TextLine.tcc b/libraries/data/tcc/TextLine.tcc
deleted file mode 100644
index 069699fee..000000000
--- a/libraries/data/tcc/TextLine.tcc
+++ /dev/null
@@ -1,42 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     TextLine.tcc (data)
-//  Authors:  Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#include <utilities/include/CStringParser.h>
-#include <utilities/include/Exception.h>
-
-namespace ell
-{
-namespace data
-{
-    template <typename ValueType>
-    void TextLine::ParseAdvance(ValueType& value)
-    {
-        auto result = utilities::Parse(_currentChar, value);
-        if (result != utilities::ParseResult::success)
-        {
-            throw utilities::DataFormatException(utilities::DataFormatErrors::badFormat, "could not parse value");
-        }
-    }
-
-    template <typename ValueType>
-    size_t TextLine::TryParse(ValueType& value) const
-    {
-        auto temp = _currentChar;
-        auto result = utilities::Parse(temp, value);
-        if (result == utilities::ParseResult::success)
-        {
-            auto stepSize = static_cast<size_t>(temp - _currentChar);
-            return stepSize;
-        }
-        else
-        {
-            return 0;
-        }
-    }
-} // namespace data
-} // namespace ell
\ No newline at end of file
diff --git a/libraries/data/tcc/TransformedDataVector.tcc b/libraries/data/tcc/TransformedDataVector.tcc
deleted file mode 100644
index 5216417f5..000000000
--- a/libraries/data/tcc/TransformedDataVector.tcc
+++ /dev/null
@@ -1,32 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     TransformedDataVector.tcc (data)
-//  Authors:  Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace data
-{
-    template <IterationPolicy policy, typename DataVectorType, typename TransformationType>
-    TransformedDataVector<policy, DataVectorType, TransformationType>::TransformedDataVector(const DataVectorType& dataVector, TransformationType transformation) :
-        _dataVector(dataVector),
-        _transformation(transformation)
-    {
-    }
-
-    template <IterationPolicy policy, typename DataVectorType, typename TransformationType>
-    TransformedDataVector<policy, DataVectorType, TransformationType> MakeTransformedDataVector(const DataVectorType& dataVector, TransformationType transformation)
-    {
-        return TransformedDataVector<policy, DataVectorType, TransformationType>(dataVector, transformation);
-    }
-
-    template <IterationPolicy policy, typename DataVectorType, typename TransformationType>
-    void operator+=(math::RowVectorReference<double> vector, const TransformedDataVector<policy, DataVectorType, TransformationType>& transformedDataVector)
-    {
-        AddTransformedTo<DataVectorType, policy>(transformedDataVector.GetDataVector(), vector, transformedDataVector.GetTransformation());
-    }
-} // namespace data
-} // namespace ell
\ No newline at end of file
diff --git a/libraries/data/tcc/TransformingIndexValueIterator.tcc b/libraries/data/tcc/TransformingIndexValueIterator.tcc
deleted file mode 100644
index 7858e1046..000000000
--- a/libraries/data/tcc/TransformingIndexValueIterator.tcc
+++ /dev/null
@@ -1,27 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     TransformingIndexValueIterator.tcc (data)
-//  Authors:  Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace data
-{
-    template <typename WrappedIndexValueIteratorType, typename TransformationType>
-    TransformingIndexValueIterator<WrappedIndexValueIteratorType, TransformationType>::TransformingIndexValueIterator(WrappedIndexValueIteratorType wrappedIterator, TransformationType transform) :
-        _wrappedIterator(std::move(wrappedIterator)),
-        _transform(std::move(transform))
-    {
-    }
-
-    template <typename WrappedIndexValueIteratorType, typename TransformationType>
-    IndexValue TransformingIndexValueIterator<WrappedIndexValueIteratorType, TransformationType>::Get() const
-    {
-        auto indexValue = _wrappedIterator.Get();
-        return { indexValue.index, _transform(indexValue) };
-    }
-} // namespace data
-} // namespace ell
\ No newline at end of file
diff --git a/libraries/dsp/CMakeLists.txt b/libraries/dsp/CMakeLists.txt
index 1f1fa15fb..8ffa1e3fe 100644
--- a/libraries/dsp/CMakeLists.txt
+++ b/libraries/dsp/CMakeLists.txt
@@ -29,12 +29,6 @@ set(include
   include/WinogradConvolution.h
 )
 
-set(tcc
-  tcc/FFT.tcc
-  tcc/IIRFilter.tcc
-  tcc/WindowFunctions.tcc
-)
-
 set(py
   py/symbolic.py
   py/winograd.py
@@ -42,10 +36,9 @@ set(py
 
 source_group("src" FILES ${src})
 source_group("include" FILES ${include})
-source_group("tcc" FILES ${tcc})
 source_group("py" FILES ${py})
 
-add_library(${library_name} ${src} ${include} ${tcc})
+add_library(${library_name} ${src} ${include})
 target_include_directories(${library_name} PRIVATE include ${ELL_LIBRARIES_DIR})
 target_link_libraries(${library_name} math utilities)
 
@@ -88,9 +81,6 @@ set(test_include
   test/include/WindowTest.h
 )
 
-set(test_tcc
-)
-
 set(test_py
   test/py/generate_test_data.py
 )
@@ -101,11 +91,10 @@ set(test_data
 
 source_group("src" FILES ${test_src})
 source_group("include" FILES ${test_include})
-source_group("tcc" FILES ${test_tcc})
 source_group("py" FILES ${test_py})
 source_group("data" FILES ${test_data})
 
-add_executable(${test_name} ${test_src} ${test_include} ${test_tcc} ${test_data})
+add_executable(${test_name} ${test_src} ${test_include} ${test_data})
 target_include_directories(${test_name} PRIVATE test/include ${ELL_LIBRARIES_DIR})
 target_link_libraries(${test_name} dsp testing utilities data common)
 copy_shared_libraries(${test_name})
@@ -140,18 +129,14 @@ set(timing_include
   test/include/DSPTestUtilities.h
 )
 
-set(timing_tcc
-)
-
 set(timing_py
 )
 
 source_group("src" FILES ${timing_src})
 source_group("include" FILES ${timing_include})
-source_group("tcc" FILES ${timing_tcc})
 source_group("py" FILES ${timing_py})
 
-add_executable(${timing_name} ${timing_src} ${timing_include} ${timing_tcc} ${include})
+add_executable(${timing_name} ${timing_src} ${timing_include} ${include})
 target_include_directories(${timing_name} PRIVATE test/include ${ELL_LIBRARIES_DIR})
 target_link_libraries(${timing_name} dsp testing utilities)
 copy_shared_libraries(${timing_name})
diff --git a/libraries/dsp/include/DCT.h b/libraries/dsp/include/DCT.h
index 374f3f96e..6ade05e20 100644
--- a/libraries/dsp/include/DCT.h
+++ b/libraries/dsp/include/DCT.h
@@ -51,4 +51,80 @@ namespace dsp
 } // namespace dsp
 } // namespace ell
 
-#include "../tcc/DCT.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace dsp
+{
+    // From wikipedia
+    // https://en.wikipedia.org/wiki/Discrete_cosine_transform#DCT-II
+
+    //        N-1
+    // X[k] = sum x_n*cos((pi/N) * (n + 1/2)*k)
+    //        n=0
+    //
+    // If normalized, the x_0 term gets scaled by 1/sqrt(2), and then multiply the overall result by sqrt(2/N)
+    template <typename ValueType>
+    math::RowMatrix<ValueType> GetDCTMatrix(size_t windowSize, size_t numFilters, bool normalize)
+    {
+        const auto pi = math::Constants<ValueType>::pi;
+        const auto one_sqrt2 = 1.0 / std::sqrt(2.0);
+        const auto scale = std::sqrt(2.0 / windowSize);
+        math::RowMatrix<ValueType> result(numFilters, windowSize);
+        for (size_t k = 0; k < numFilters; ++k)
+        {
+            for (size_t n = 0; n < windowSize; ++n)
+            {
+                auto x = std::cos((pi * (n + 0.5) * k) / windowSize);
+                if (normalize)
+                {
+                    if (k == 0)
+                    {
+                        x *= one_sqrt2;
+                    }
+                    x *= scale;
+                }
+                result(k, n) = static_cast<ValueType>(x);
+            }
+        }
+
+        return result;
+    }
+
+    template <typename ValueType>
+    math::ColumnVector<ValueType> DCT(math::ConstRowMatrixReference<ValueType> dctMatrix, math::ConstColumnVectorReference<ValueType> signal, bool normalize)
+    {
+        math::ColumnVector<ValueType> result(signal.Size());
+        if (normalize)
+        {
+            throw utilities::LogicException(utilities::LogicExceptionErrors::notImplemented);
+        }
+        else
+        {
+            math::MultiplyScaleAddUpdate(static_cast<ValueType>(1.0), dctMatrix, signal, static_cast<ValueType>(0.0), result);
+        }
+        return result;
+    }
+
+    template <typename ValueType>
+    math::ColumnVector<ValueType> DCT(math::ConstColumnVectorReference<ValueType> signal, size_t numFilters, bool normalize)
+    {
+        auto windowSize = signal.Size();
+        auto dctMatrix = GetDCTMatrix<ValueType>(windowSize, numFilters);
+        math::ColumnVector<ValueType> result(numFilters);
+        if (normalize)
+        {
+            throw utilities::LogicException(utilities::LogicExceptionErrors::notImplemented);
+        }
+        else
+        {
+            math::MultiplyScaleAddUpdate(static_cast<ValueType>(1.0), dctMatrix, signal, static_cast<ValueType>(0.0), result);
+        }
+        return result;
+    }
+
+} // end namespace dsp
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/dsp/include/FFT.h b/libraries/dsp/include/FFT.h
index cdd443f99..5d7bf6339 100644
--- a/libraries/dsp/include/FFT.h
+++ b/libraries/dsp/include/FFT.h
@@ -51,4 +51,155 @@ namespace dsp
 } // namespace dsp
 } // namespace ell
 
-#include "../tcc/FFT.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace dsp
+{
+    namespace detail
+    {
+        template <typename Iterator>
+        void Deinterleave(Iterator begin, Iterator end, Iterator scratch)
+        {
+            const auto size = end - begin;
+            const auto halfN = size / 2;
+            for (int index = 0; index < halfN; ++index)
+            {
+                scratch[index] = begin[2 * index + 1];
+                begin[index] = begin[2 * index];
+            }
+            for (int index = 0; index < halfN; ++index)
+            {
+                begin[index + halfN] = scratch[index];
+            }
+        }
+
+        template <typename Iterator>
+        void FFT(Iterator begin, Iterator end, Iterator scratch, bool inverse)
+        {
+            if (inverse)
+            {
+                throw utilities::LogicException(utilities::LogicExceptionErrors::notImplemented);
+            }
+            using ValueType = typename Iterator::value_type::value_type;
+            const ValueType pi = math::Constants<ValueType>::pi;
+
+            auto halfN = (end - begin) / 2;
+            if (halfN < 1)
+            {
+                return; // done
+            }
+
+            Deinterleave(begin, end, scratch);
+            auto evens = begin;
+            auto odds = begin + halfN;
+
+            if (halfN > 1)
+            {
+                FFT(evens, evens + halfN, scratch, inverse);
+                FFT(odds, odds + halfN, scratch, inverse);
+            }
+
+            for (int k = 0; k < halfN; k++)
+            {
+                // w = e^(2*pi*k/N)
+                std::complex<ValueType> w = std::exp(std::complex<ValueType>(0, pi * k / halfN));
+                auto e = evens[k];
+                auto o = odds[k];
+                auto wo = w * o;
+                evens[k] = e + wo; // even
+                odds[k] = e - wo; // odd
+            }
+        }
+
+        template <typename Iterator, typename ComplexIterator>
+        void FFTReal(Iterator begin, Iterator end, Iterator scratch, ComplexIterator outputBegin, ComplexIterator outputEnd, bool inverse)
+        {
+            UNUSED(outputEnd);
+            if (inverse)
+            {
+                throw utilities::LogicException(utilities::LogicExceptionErrors::notImplemented, "inverse must be false");
+            }
+
+            using ValueType = typename Iterator::value_type;
+            const ValueType pi = math::Constants<ValueType>::pi;
+
+            auto halfN = (end - begin) / 2;
+            if (halfN < 1)
+            {
+                return; // done
+            }
+
+            Deinterleave(begin, end, scratch);
+
+            auto evens = begin;
+            auto odds = begin + halfN;
+            auto complexEvens = outputBegin;
+            auto complexOdds = outputBegin + halfN;
+
+            if (halfN > 1)
+            {
+                FFTReal(evens, evens + halfN, scratch, complexEvens, complexEvens + halfN, inverse);
+                FFTReal(odds, odds + halfN, scratch, complexOdds, complexOdds + halfN, inverse);
+            }
+            else
+            {
+                // Base case: copy from real to complex
+                for (int index = 0; index < halfN; ++index)
+                {
+                    complexEvens[index] = evens[index];
+                    complexOdds[index] = odds[index];
+                }
+            }
+
+            for (int k = 0; k < halfN; k++)
+            {
+                // w = e^(2*pi*k/N)
+                std::complex<ValueType> w = std::exp(std::complex<ValueType>(0, pi * k / halfN));
+                auto e = complexEvens[k];
+                auto o = complexOdds[k];
+                auto wo = w * o;
+                complexEvens[k] = e + wo; // even
+                complexOdds[k] = e - wo; // odd
+            }
+        }
+    } // namespace detail
+
+    template <typename ValueType>
+    void FFT(std::vector<std::complex<ValueType>>& input, bool inverse)
+    {
+        std::vector<std::complex<ValueType>> scratch(input.size() / 2);
+        detail::FFT(std::begin(input), std::end(input), std::begin(scratch), inverse);
+    }
+
+    template <typename ValueType>
+    void FFT(std::vector<ValueType>& input, bool inverse)
+    {
+        auto size = input.size();
+        std::vector<ValueType> scratch(size / 2);
+        std::vector<std::complex<ValueType>> output(size);
+        detail::FFTReal(std::begin(input), std::end(input), std::begin(scratch), std::begin(output), std::end(output), inverse);
+        for (size_t index = 0; index < size; ++index)
+        {
+            input[index] = std::abs(output[index]);
+        }
+    }
+
+    template <typename ValueType>
+    void FFT(math::RowVector<ValueType>& input, bool inverse)
+    {
+        using std::begin;
+        auto size = input.Size();
+        math::RowVector<ValueType> scratch(size / 2);
+        std::vector<std::complex<ValueType>> output(size);
+        detail::FFTReal(begin(input), end(input), begin(scratch), begin(output), end(output), inverse);
+        for (size_t index = 0; index < size; ++index)
+        {
+            input[index] = std::abs(output[index]);
+        }
+    }
+} // namespace dsp
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/dsp/include/IIRFilter.h b/libraries/dsp/include/IIRFilter.h
index c7325274c..10de45f16 100644
--- a/libraries/dsp/include/IIRFilter.h
+++ b/libraries/dsp/include/IIRFilter.h
@@ -88,4 +88,76 @@ namespace dsp
 } // namespace dsp
 } // namespace ell
 
-#include "../tcc/IIRFilter.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace dsp
+{
+    template <typename ValueType>
+    IIRFilter<ValueType>::IIRFilter(std::vector<ValueType> b, std::vector<ValueType> a) :
+        _previousInput(b.size()),
+        _previousOutput(a.size()),
+        _b(b),
+        _a(a)
+    {
+        Reset();
+    }
+
+    template <typename ValueType>
+    ValueType IIRFilter<ValueType>::FilterSample(ValueType x)
+    {
+        _previousInput.Append(x);
+        ValueType output = 0;
+        assert(_b.size() == _previousInput.Size());
+        assert(_a.size() == _previousOutput.Size());
+        for (size_t index = 0; index < _b.size(); index++)
+        {
+            output += _b[index] * _previousInput[static_cast<int>(index)];
+        }
+
+        for (size_t index = 0; index < _a.size(); index++)
+        {
+            output -= _a[index] * _previousOutput[static_cast<int>(index)];
+        }
+
+        _previousOutput.Append(output);
+        return output;
+    }
+
+    template <typename ValueType>
+    std::vector<ValueType> IIRFilter<ValueType>::FilterSamples(const std::vector<ValueType>& x)
+    {
+        std::vector<ValueType> result(x.size());
+        std::transform(x.begin(), x.end(), result.begin(), [this](ValueType elem) {
+            return FilterSample(elem);
+        });
+        return result;
+    }
+
+    template <typename ValueType>
+    void IIRFilter<ValueType>::Reset()
+    {
+        _previousInput.Fill(0);
+        _previousOutput.Fill(0);
+    }
+
+    template <typename ValueType>
+    void IIRFilter<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
+    {
+        archiver["b"] << _b;
+        archiver["a"] << _a;
+    }
+
+    template <typename ValueType>
+    void IIRFilter<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
+    {
+        archiver["b"] >> _b;
+        archiver["a"] >> _a;
+        _previousInput.Resize(_b.size());
+        _previousOutput.Resize(_a.size());
+    }
+} // namespace dsp
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/dsp/include/WindowFunctions.h b/libraries/dsp/include/WindowFunctions.h
index 087b16f7b..94f45abac 100644
--- a/libraries/dsp/include/WindowFunctions.h
+++ b/libraries/dsp/include/WindowFunctions.h
@@ -53,4 +53,72 @@ namespace dsp
 } // namespace dsp
 } // namespace ell
 
-#include "../tcc/WindowFunctions.tcc"
+#pragma region implementation
+
+#include <math/include/MathConstants.h>
+
+#include <cmath>
+
+namespace ell
+{
+namespace dsp
+{
+    // Generalized cosine window
+    template <typename ValueType>
+    std::vector<ValueType> GeneralizedCosineWindow(size_t size, const std::vector<double>& coefficients, WindowSymmetry symmetry)
+    {
+        // Formula for generalized cosine window:
+        //
+        //        N-1
+        // Y[i] = sum( (-1 ^ k) * a_k * cos(2 pi k n / D) )
+        //        n=0
+        //
+        // where D == N-1 for the symmetric case, and D == N for the periodic case
+        //
+        // https://en.wikipedia.org/wiki/Window_function#Cosine-sum_windows
+
+        const auto pi = math::Constants<double>::pi;
+        const auto numCoeffs = coefficients.size();
+        auto denom = static_cast<ValueType>(symmetry == WindowSymmetry::symmetric ? size - 1 : size);
+        std::vector<ValueType> result(size);
+        for (size_t index = 0; index < size; index++)
+        {
+            double windowVal = coefficients[0];
+            double sign = -1.0;
+            for (size_t coeffIndex = 1; coeffIndex < numCoeffs; ++coeffIndex)
+            {
+                windowVal += sign * coefficients[coeffIndex] * std::cos((2 * pi * coeffIndex * index) / denom);
+                sign *= -1;
+            }
+            result[index] = static_cast<ValueType>(windowVal);
+        }
+        return result;
+    }
+
+    // Hamming window
+    template <typename ValueType>
+    std::vector<ValueType> HammingWindow(size_t size, WindowSymmetry symmetry)
+    {
+        // The original alpha and beta values for the Hamming window are alpha = 0.54, beta = 1 - alpha = 0.46
+        // These values are an approximation to alpha = 25/46, beta = 1 - alpha = 21/46 (which cancels the first sidelobe), but have better equiripple properties
+        // However, ideal equiripple coefficients are: alpha = 0.53836, beta = 1 - alpha = 0.46164
+        // For compatibility with existing DSP libraries, we use the values 0.54 and 0.46
+        // https://en.wikipedia.org/wiki/Window_function#Hamming_window
+
+        // const double alpha = 0.53836;
+        const double alpha = 0.54;
+        const double beta = 1.0 - alpha;
+        return GeneralizedCosineWindow<ValueType>(size, { alpha, beta }, symmetry);
+    }
+
+    // Hann window
+    template <typename ValueType>
+    std::vector<ValueType> HannWindow(size_t size, WindowSymmetry symmetry)
+    {
+        // https://en.wikipedia.org/wiki/Window_function#Hann_window
+        return GeneralizedCosineWindow<ValueType>(size, { 0.5, 0.5 }, symmetry);
+    }
+} // namespace dsp
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/dsp/tcc/DCT.tcc b/libraries/dsp/tcc/DCT.tcc
deleted file mode 100644
index 2c8b737f0..000000000
--- a/libraries/dsp/tcc/DCT.tcc
+++ /dev/null
@@ -1,81 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     DCT.tcc (dsp)
-//  Authors:  Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace dsp
-{
-    // From wikipedia
-    // https://en.wikipedia.org/wiki/Discrete_cosine_transform#DCT-II
-
-    //        N-1
-    // X[k] = sum x_n*cos((pi/N) * (n + 1/2)*k)
-    //        n=0
-    //
-    // If normalized, the x_0 term gets scaled by 1/sqrt(2), and then multiply the overall result by sqrt(2/N)
-    template <typename ValueType>
-    math::RowMatrix<ValueType> GetDCTMatrix(size_t windowSize, size_t numFilters, bool normalize)
-    {
-        const auto pi = math::Constants<ValueType>::pi;
-        const auto one_sqrt2 = 1.0 / std::sqrt(2.0);
-        const auto scale = std::sqrt(2.0 / windowSize);
-        math::RowMatrix<ValueType> result(numFilters, windowSize);
-        for (size_t k = 0; k < numFilters; ++k)
-        {
-            for (size_t n = 0; n < windowSize; ++n)
-            {
-                auto x = std::cos((pi * (n + 0.5) * k) / windowSize);
-                if (normalize)
-                {
-                    if (k == 0)
-                    {
-                        x *= one_sqrt2;
-                    }
-                    x *= scale;
-                }
-                result(k, n) = static_cast<ValueType>(x);
-            }
-        }
-
-        return result;
-    }
-
-    template <typename ValueType>
-    math::ColumnVector<ValueType> DCT(math::ConstRowMatrixReference<ValueType> dctMatrix, math::ConstColumnVectorReference<ValueType> signal, bool normalize)
-    {
-        math::ColumnVector<ValueType> result(signal.Size());
-        if (normalize)
-        {
-            throw utilities::LogicException(utilities::LogicExceptionErrors::notImplemented);
-        }
-        else
-        {
-            math::MultiplyScaleAddUpdate(static_cast<ValueType>(1.0), dctMatrix, signal, static_cast<ValueType>(0.0), result);
-        }
-        return result;
-    }
-
-    template <typename ValueType>
-    math::ColumnVector<ValueType> DCT(math::ConstColumnVectorReference<ValueType> signal, size_t numFilters, bool normalize)
-    {
-        auto windowSize = signal.Size();
-        auto dctMatrix = GetDCTMatrix<ValueType>(windowSize, numFilters);
-        math::ColumnVector<ValueType> result(numFilters);
-        if (normalize)
-        {
-            throw utilities::LogicException(utilities::LogicExceptionErrors::notImplemented);
-        }
-        else
-        {
-            math::MultiplyScaleAddUpdate(static_cast<ValueType>(1.0), dctMatrix, signal, static_cast<ValueType>(0.0), result);
-        }
-        return result;
-    }
-
-} // end namespace dsp
-} // namespace ell
diff --git a/libraries/dsp/tcc/FFT.tcc b/libraries/dsp/tcc/FFT.tcc
deleted file mode 100644
index 74c20820b..000000000
--- a/libraries/dsp/tcc/FFT.tcc
+++ /dev/null
@@ -1,156 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     FFT.tcc (dsp)
-//  Authors:  James Devine, Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace dsp
-{
-    namespace detail
-    {
-        template <typename Iterator>
-        void Deinterleave(Iterator begin, Iterator end, Iterator scratch)
-        {
-            const auto size = end - begin;
-            const auto halfN = size / 2;
-            for (int index = 0; index < halfN; ++index)
-            {
-                scratch[index] = begin[2 * index + 1];
-                begin[index] = begin[2 * index];
-            }
-            for (int index = 0; index < halfN; ++index)
-            {
-                begin[index + halfN] = scratch[index];
-            }
-        }
-
-        template <typename Iterator>
-        void FFT(Iterator begin, Iterator end, Iterator scratch, bool inverse)
-        {
-            if (inverse)
-            {
-                throw utilities::LogicException(utilities::LogicExceptionErrors::notImplemented);
-            }
-            using ValueType = typename Iterator::value_type::value_type;
-            const ValueType pi = math::Constants<ValueType>::pi;
-
-            auto halfN = (end - begin) / 2;
-            if (halfN < 1)
-            {
-                return; // done
-            }
-
-            Deinterleave(begin, end, scratch);
-            auto evens = begin;
-            auto odds = begin + halfN;
-
-            if (halfN > 1)
-            {
-                FFT(evens, evens + halfN, scratch, inverse);
-                FFT(odds, odds + halfN, scratch, inverse);
-            }
-
-            for (int k = 0; k < halfN; k++)
-            {
-                // w = e^(2*pi*k/N)
-                std::complex<ValueType> w = std::exp(std::complex<ValueType>(0, pi * k / halfN));
-                auto e = evens[k];
-                auto o = odds[k];
-                auto wo = w * o;
-                evens[k] = e + wo; // even
-                odds[k] = e - wo; // odd
-            }
-        }
-
-        template <typename Iterator, typename ComplexIterator>
-        void FFTReal(Iterator begin, Iterator end, Iterator scratch, ComplexIterator outputBegin, ComplexIterator outputEnd, bool inverse)
-        {
-            UNUSED(outputEnd);
-            if (inverse)
-            {
-                throw utilities::LogicException(utilities::LogicExceptionErrors::notImplemented, "inverse must be false");
-            }
-
-            using ValueType = typename Iterator::value_type;
-            const ValueType pi = math::Constants<ValueType>::pi;
-
-            auto halfN = (end - begin) / 2;
-            if (halfN < 1)
-            {
-                return; // done
-            }
-
-            Deinterleave(begin, end, scratch);
-
-            auto evens = begin;
-            auto odds = begin + halfN;
-            auto complexEvens = outputBegin;
-            auto complexOdds = outputBegin + halfN;
-
-            if (halfN > 1)
-            {
-                FFTReal(evens, evens + halfN, scratch, complexEvens, complexEvens + halfN, inverse);
-                FFTReal(odds, odds + halfN, scratch, complexOdds, complexOdds + halfN, inverse);
-            }
-            else
-            {
-                // Base case: copy from real to complex
-                for (int index = 0; index < halfN; ++index)
-                {
-                    complexEvens[index] = evens[index];
-                    complexOdds[index] = odds[index];
-                }
-            }
-
-            for (int k = 0; k < halfN; k++)
-            {
-                // w = e^(2*pi*k/N)
-                std::complex<ValueType> w = std::exp(std::complex<ValueType>(0, pi * k / halfN));
-                auto e = complexEvens[k];
-                auto o = complexOdds[k];
-                auto wo = w * o;
-                complexEvens[k] = e + wo; // even
-                complexOdds[k] = e - wo; // odd
-            }
-        }
-    } // namespace detail
-
-    template <typename ValueType>
-    void FFT(std::vector<std::complex<ValueType>>& input, bool inverse)
-    {
-        std::vector<std::complex<ValueType>> scratch(input.size() / 2);
-        detail::FFT(std::begin(input), std::end(input), std::begin(scratch), inverse);
-    }
-
-    template <typename ValueType>
-    void FFT(std::vector<ValueType>& input, bool inverse)
-    {
-        auto size = input.size();
-        std::vector<ValueType> scratch(size / 2);
-        std::vector<std::complex<ValueType>> output(size);
-        detail::FFTReal(std::begin(input), std::end(input), std::begin(scratch), std::begin(output), std::end(output), inverse);
-        for (size_t index = 0; index < size; ++index)
-        {
-            input[index] = std::abs(output[index]);
-        }
-    }
-
-    template <typename ValueType>
-    void FFT(math::RowVector<ValueType>& input, bool inverse)
-    {
-        using std::begin;
-        auto size = input.Size();
-        math::RowVector<ValueType> scratch(size / 2);
-        std::vector<std::complex<ValueType>> output(size);
-        detail::FFTReal(begin(input), end(input), begin(scratch), begin(output), end(output), inverse);
-        for (size_t index = 0; index < size; ++index)
-        {
-            input[index] = std::abs(output[index]);
-        }
-    }
-} // namespace dsp
-} // namespace ell
diff --git a/libraries/dsp/tcc/IIRFilter.tcc b/libraries/dsp/tcc/IIRFilter.tcc
deleted file mode 100644
index f7ebbd6ea..000000000
--- a/libraries/dsp/tcc/IIRFilter.tcc
+++ /dev/null
@@ -1,77 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     IIRFilter.tcc (dsp)
-//  Authors:  Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace dsp
-{
-    template <typename ValueType>
-    IIRFilter<ValueType>::IIRFilter(std::vector<ValueType> b, std::vector<ValueType> a) :
-        _previousInput(b.size()),
-        _previousOutput(a.size()),
-        _b(b),
-        _a(a)
-    {
-        Reset();
-    }
-
-    template <typename ValueType>
-    ValueType IIRFilter<ValueType>::FilterSample(ValueType x)
-    {
-        _previousInput.Append(x);
-        ValueType output = 0;
-        assert(_b.size() == _previousInput.Size());
-        assert(_a.size() == _previousOutput.Size());
-        for (size_t index = 0; index < _b.size(); index++)
-        {
-            output += _b[index] * _previousInput[static_cast<int>(index)];
-        }
-
-        for (size_t index = 0; index < _a.size(); index++)
-        {
-            output -= _a[index] * _previousOutput[static_cast<int>(index)];
-        }
-
-        _previousOutput.Append(output);
-        return output;
-    }
-
-    template <typename ValueType>
-    std::vector<ValueType> IIRFilter<ValueType>::FilterSamples(const std::vector<ValueType>& x)
-    {
-        std::vector<ValueType> result(x.size());
-        std::transform(x.begin(), x.end(), result.begin(), [this](ValueType elem) {
-            return FilterSample(elem);
-        });
-        return result;
-    }
-
-    template <typename ValueType>
-    void IIRFilter<ValueType>::Reset()
-    {
-        _previousInput.Fill(0);
-        _previousOutput.Fill(0);
-    }
-
-    template <typename ValueType>
-    void IIRFilter<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
-    {
-        archiver["b"] << _b;
-        archiver["a"] << _a;
-    }
-
-    template <typename ValueType>
-    void IIRFilter<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
-    {
-        archiver["b"] >> _b;
-        archiver["a"] >> _a;
-        _previousInput.Resize(_b.size());
-        _previousOutput.Resize(_a.size());
-    }
-} // namespace dsp
-} // namespace ell
diff --git a/libraries/dsp/tcc/WindowFunctions.tcc b/libraries/dsp/tcc/WindowFunctions.tcc
deleted file mode 100644
index f0513d781..000000000
--- a/libraries/dsp/tcc/WindowFunctions.tcc
+++ /dev/null
@@ -1,73 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     WindowFunctions.tcc (dsp)
-//  Authors:  Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#include <math/include/MathConstants.h>
-
-#include <cmath>
-
-namespace ell
-{
-namespace dsp
-{
-    // Generalized cosine window
-    template <typename ValueType>
-    std::vector<ValueType> GeneralizedCosineWindow(size_t size, const std::vector<double>& coefficients, WindowSymmetry symmetry)
-    {
-        // Formula for generalized cosine window:
-        //
-        //        N-1
-        // Y[i] = sum( (-1 ^ k) * a_k * cos(2 pi k n / D) )
-        //        n=0
-        //
-        // where D == N-1 for the symmetric case, and D == N for the periodic case
-        //
-        // https://en.wikipedia.org/wiki/Window_function#Cosine-sum_windows
-
-        const auto pi = math::Constants<double>::pi;
-        const auto numCoeffs = coefficients.size();
-        auto denom = static_cast<ValueType>(symmetry == WindowSymmetry::symmetric ? size - 1 : size);
-        std::vector<ValueType> result(size);
-        for (size_t index = 0; index < size; index++)
-        {
-            double windowVal = coefficients[0];
-            double sign = -1.0;
-            for (size_t coeffIndex = 1; coeffIndex < numCoeffs; ++coeffIndex)
-            {
-                windowVal += sign * coefficients[coeffIndex] * std::cos((2 * pi * coeffIndex * index) / denom);
-                sign *= -1;
-            }
-            result[index] = static_cast<ValueType>(windowVal);
-        }
-        return result;
-    }
-
-    // Hamming window
-    template <typename ValueType>
-    std::vector<ValueType> HammingWindow(size_t size, WindowSymmetry symmetry)
-    {
-        // The original alpha and beta values for the Hamming window are alpha = 0.54, beta = 1 - alpha = 0.46
-        // These values are an approximation to alpha = 25/46, beta = 1 - alpha = 21/46 (which cancels the first sidelobe), but have better equiripple properties
-        // However, ideal equiripple coefficients are: alpha = 0.53836, beta = 1 - alpha = 0.46164
-        // For compatibility with existing DSP libraries, we use the values 0.54 and 0.46
-        // https://en.wikipedia.org/wiki/Window_function#Hamming_window
-
-        // const double alpha = 0.53836;
-        const double alpha = 0.54;
-        const double beta = 1.0 - alpha;
-        return GeneralizedCosineWindow<ValueType>(size, { alpha, beta }, symmetry);
-    }
-
-    // Hann window
-    template <typename ValueType>
-    std::vector<ValueType> HannWindow(size_t size, WindowSymmetry symmetry)
-    {
-        // https://en.wikipedia.org/wiki/Window_function#Hann_window
-        return GeneralizedCosineWindow<ValueType>(size, { 0.5, 0.5 }, symmetry);
-    }
-} // namespace dsp
-} // namespace ell
diff --git a/libraries/emitters/CMakeLists.txt b/libraries/emitters/CMakeLists.txt
index a23042ab8..cbad5ea49 100644
--- a/libraries/emitters/CMakeLists.txt
+++ b/libraries/emitters/CMakeLists.txt
@@ -86,21 +86,6 @@ set (include
     include/VectorVariable.h
 )
 
-set (tcc
-    tcc/CompilableIRFunction.tcc
-    tcc/EmitterTypes.tcc
-    tcc/IREmitter.tcc
-    tcc/IRFunctionEmitter.tcc
-    tcc/IRLocalScalar.tcc
-    tcc/IRModuleEmitter.tcc
-    tcc/IRRuntime.tcc
-    tcc/IRVectorUtilities.tcc
-    tcc/ScalarVariable.tcc
-    tcc/SymbolTable.tcc
-    tcc/VectorVariable.tcc
-    tcc/Variable.tcc
-)
-
 set (templates
     templates/CppPredictWrapper.in
     templates/SwigModule.in
@@ -110,10 +95,9 @@ set (templates
 
 source_group("src" FILES ${src})
 source_group("include" FILES ${include})
-source_group("tcc" FILES ${tcc})
 source_group("templates" FILES ${templates})
 
-add_library(${library_name} ${src} ${include} ${tcc} ${templates})
+add_library(${library_name} ${src} ${include} ${templates})
 target_include_directories(${library_name} PRIVATE include templates ${ELL_LIBRARIES_DIR})
 target_include_directories(${library_name} SYSTEM PUBLIC ${LLVM_INCLUDE_DIRS})
 target_link_libraries(${library_name} math utilities ${LLVM_LIBS})
diff --git a/libraries/emitters/include/CompilableIRFunction.h b/libraries/emitters/include/CompilableIRFunction.h
index 0095725c5..fb79f03f2 100644
--- a/libraries/emitters/include/CompilableIRFunction.h
+++ b/libraries/emitters/include/CompilableIRFunction.h
@@ -77,4 +77,25 @@ namespace emitters
 } // namespace emitters
 } // namespace ell
 
-#include "../tcc/CompilableIRFunction.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace emitters
+{
+    template <typename ValueType>
+    ValueType IRAddFunction<ValueType>::Compute(ValueType x, ValueType y) const
+    {
+        return x + y;
+    }
+
+    template <typename ValueType>
+    LLVMValue IRAddFunction<ValueType>::Compile(IRFunctionEmitter& function, LLVMValue x, LLVMValue y) const
+    {
+        LLVMValue sum = function.Operator(emitters::GetAddForValueType<ValueType>(), x, y);
+        return sum;
+    }
+} // namespace emitters
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/emitters/include/EmitterTypes.h b/libraries/emitters/include/EmitterTypes.h
index 29759a7f3..a9ec757c9 100644
--- a/libraries/emitters/include/EmitterTypes.h
+++ b/libraries/emitters/include/EmitterTypes.h
@@ -327,4 +327,37 @@ namespace emitters
 } // namespace emitters
 } // namespace ell
 
-#include "../tcc/EmitterTypes.tcc"
\ No newline at end of file
+#pragma region implementation
+
+namespace ell
+{
+namespace emitters
+{
+    template <typename T>
+    std::vector<typename VariableValueType<T>::DestType> VariableValueType<T>::ToVariableVector(const std::vector<T>& src)
+    {
+        return src;
+    }
+
+    template <typename T>
+    std::vector<T> VariableValueType<T>::FromVariableVector(const std::vector<typename VariableValueType<T>::DestType>& src)
+    {
+        return src;
+    }
+
+    // bool specialization
+    inline std::vector<typename VariableValueType<bool>::DestType> VariableValueType<bool>::ToVariableVector(const std::vector<bool>& src)
+    {
+        std::vector<VariableValueType<bool>::DestType> result(src.begin(), src.end());
+        return result;
+    }
+
+    inline std::vector<bool> VariableValueType<bool>::FromVariableVector(const std::vector<typename VariableValueType<bool>::DestType>& src)
+    {
+        std::vector<bool> result(src.begin(), src.end());
+        return result;
+    }
+} // namespace emitters
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/emitters/include/IREmitter.h b/libraries/emitters/include/IREmitter.h
index 7c96b97bf..76b215a16 100644
--- a/libraries/emitters/include/IREmitter.h
+++ b/libraries/emitters/include/IREmitter.h
@@ -1117,4 +1117,44 @@ namespace emitters
 } // namespace emitters
 } // namespace ell
 
-#include "../tcc/IREmitter.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace emitters
+{
+    template <typename OutputType>
+    LLVMValue IREmitter::CastValue(LLVMValue pValue)
+    {
+        auto outputType = GetVariableType<OutputType>();
+        return CastValue(pValue, outputType);
+    }
+
+    template <typename OutputType>
+    LLVMValue IREmitter::CastUnsignedValue(LLVMValue pValue)
+    {
+        auto outputType = GetVariableType<OutputType>();
+        return CastUnsignedValue(pValue, outputType);
+    }
+
+    template <typename ValueType>
+    llvm::Constant* IREmitter::Pointer(ValueType* ptr)
+    {
+        auto ptrValue = Literal(reinterpret_cast<int64_t>(ptr));
+        auto ptrType = PointerType(GetVariableType<ValueType>());
+        return llvm::ConstantExpr::getIntToPtr(ptrValue, ptrType);
+    }
+
+    template <typename ListType>
+    void IREmitter::BindArgumentNames(LLVMFunction pFunction, const ListType& arguments)
+    {
+        size_t i = 0;
+        for (auto& argument : pFunction->args())
+        {
+            argument.setName(arguments[i++].first);
+        }
+    }
+} // namespace emitters
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/emitters/include/IRFunctionEmitter.h b/libraries/emitters/include/IRFunctionEmitter.h
index 11608cc63..162a49ff4 100644
--- a/libraries/emitters/include/IRFunctionEmitter.h
+++ b/libraries/emitters/include/IRFunctionEmitter.h
@@ -1747,4 +1747,173 @@ namespace emitters
 } // namespace emitters
 } // namespace ell
 
-#include "../tcc/IRFunctionEmitter.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace emitters
+{
+    template <typename ValueType, utilities::IsFundamental<ValueType>>
+    IRLocalScalar IRFunctionEmitter::LocalScalar(ValueType value)
+    {
+        return IRLocalScalar(*this, Literal(value));
+    }
+
+    template <typename ValueType>
+    LLVMValue IRFunctionEmitter::Literal(ValueType value)
+    {
+        return _pEmitter->Literal(value);
+    }
+
+    template <typename ValueType>
+    LLVMValue IRFunctionEmitter::Pointer(ValueType* value)
+    {
+        return _pEmitter->Pointer(value);
+    }
+
+    template <typename OutputType>
+    LLVMValue IRFunctionEmitter::CastValue(LLVMValue pValue)
+    {
+        return _pEmitter->CastValue<OutputType>(pValue);
+    }
+
+    template <typename OutputType>
+    LLVMValue IRFunctionEmitter::CastUnsignedValue(LLVMValue pValue)
+    {
+        return _pEmitter->CastValue<OutputType>(pValue);
+    }
+
+    template <typename ValueType>
+    void IRFunctionEmitter::VectorOperator(TypedOperator type, size_t size, ValueType leftValue, LLVMValue pRightValue, std::function<void(LLVMValue, LLVMValue)> aggregator)
+    {
+        assert(pRightValue != nullptr);
+
+        LLVMValue pLeftItem = Literal(leftValue);
+        For(size, [pLeftItem, pRightValue, type, aggregator](IRFunctionEmitter& fn, LLVMValue i) {
+            LLVMValue pRightItem = fn.ValueAt(pRightValue, i);
+            LLVMValue pTemp = fn.Operator(type, pLeftItem, pRightItem);
+            aggregator(i, pTemp);
+        });
+    }
+
+    template <typename ValueType>
+    void IRFunctionEmitter::VectorOperator(TypedOperator type, size_t size, LLVMValue pLeftValue, ValueType rightValue, std::function<void(LLVMValue, LLVMValue)> aggregator)
+    {
+        assert(pLeftValue != nullptr);
+
+        LLVMValue pRightItem = Literal(rightValue);
+        For(size, [pLeftValue, pRightItem, type, aggregator](IRFunctionEmitter& fn, LLVMValue i) {
+            LLVMValue pLeftItem = fn.ValueAt(pLeftValue, i);
+            LLVMValue pTemp = fn.Operator(type, pLeftItem, pRightItem);
+            aggregator(i, pTemp);
+        });
+    }
+
+    template <typename ValueType>
+    LLVMValue IRFunctionEmitter::Malloc(int64_t size)
+    {
+        return Malloc(GetVariableType<ValueType>(), size);
+    }
+
+    template <typename ValueType>
+    void IRFunctionEmitter::MemoryMove(LLVMValue pPointer, int sourceOffset, int destinationOffset, int count)
+    {
+        assert(pPointer != nullptr);
+        auto pSource = PointerOffset(pPointer, Literal(sourceOffset));
+        auto pDestination = PointerOffset(pPointer, Literal(destinationOffset));
+        int byteCount = count * sizeof(ValueType);
+        _pEmitter->MemoryMove(pSource, pDestination, Literal(byteCount));
+    }
+
+    template <typename ValueType>
+    void IRFunctionEmitter::MemoryCopy(LLVMValue pSourcePointer, LLVMValue pDestinationPointer, int count)
+    {
+        auto pSource = PointerOffset(pSourcePointer, 0);
+        auto pDestination = PointerOffset(pDestinationPointer, 0);
+        auto byteCount = count * sizeof(ValueType);
+        _pEmitter->MemoryCopy(pSource, pDestination, Literal<int>(byteCount));
+    }
+
+    template <typename ValueType>
+    void IRFunctionEmitter::MemoryCopy(LLVMValue pSourcePointer, LLVMValue pDestinationPointer, LLVMValue count)
+    {
+        auto pSource = PointerOffset(pSourcePointer, 0);
+        auto pDestination = PointerOffset(pDestinationPointer, 0);
+        auto byteCount = Operator(emitters::TypedOperator::multiply, count, Literal<int>(sizeof(ValueType)));
+        _pEmitter->MemoryCopy(pSource, pDestination, byteCount);
+    }
+
+    template <typename ValueType>
+    void IRFunctionEmitter::MemoryCopy(LLVMValue pSourcePointer, int sourceOffset, LLVMValue pDestinationPointer, int destinationOffset, int count)
+    {
+        auto pSource = PointerOffset(pSourcePointer, Literal(sourceOffset));
+        auto pDestination = PointerOffset(pDestinationPointer, Literal(destinationOffset));
+        int byteCount = count * sizeof(ValueType);
+        _pEmitter->MemoryCopy(pSource, pDestination, Literal(byteCount));
+    }
+
+    template <typename ValueType>
+    void IRFunctionEmitter::MemoryCopy(LLVMValue pSourcePointer, LLVMValue sourceOffset, LLVMValue pDestinationPointer, LLVMValue destinationOffset, LLVMValue count)
+    {
+        auto pSource = PointerOffset(pSourcePointer, sourceOffset);
+        auto pDestination = PointerOffset(pDestinationPointer, destinationOffset);
+        auto byteCount = Operator(emitters::TypedOperator::multiply, count, Literal<int>(sizeof(ValueType)));
+        _pEmitter->MemoryCopy(pSource, pDestination, byteCount);
+    }
+
+    template <typename ValueType>
+    void IRFunctionEmitter::MemorySet(LLVMValue pDestinationPointer, int destinationOffset, LLVMValue value, int count)
+    {
+        auto pDestination = PointerOffset(pDestinationPointer, Literal(destinationOffset));
+        int byteCount = count * sizeof(ValueType);
+        _pEmitter->MemorySet(pDestination, value, Literal(byteCount));
+    }
+
+    template <typename ValueType>
+    void IRFunctionEmitter::MemorySet(LLVMValue pDestinationPointer, LLVMValue pDestinationOffset, LLVMValue value, int count)
+    {
+        auto pDestination = PointerOffset(pDestinationPointer, pDestinationOffset);
+        int byteCount = count * sizeof(ValueType);
+        _pEmitter->MemorySet(pDestination, value, Literal(byteCount));
+    }
+
+    template <typename ValueType>
+    void IRFunctionEmitter::MemorySet(LLVMValue pDestinationPointer, LLVMValue pDestinationOffset, LLVMValue value, LLVMValue count)
+    {
+        auto pDestination = PointerOffset(pDestinationPointer, pDestinationOffset);
+        auto byteCount = Operator(emitters::TypedOperator::multiply, count, Literal<int>(sizeof(ValueType)));
+        _pEmitter->MemorySet(pDestination, value, byteCount);
+    }
+
+    template <typename ValueType>
+    void IRFunctionEmitter::ShiftAndUpdate(LLVMValue buffer, int bufferSize, int shiftCount, LLVMValue pNewData, LLVMValue pShiftedData)
+    {
+        assert(buffer != nullptr);
+        assert(shiftCount <= bufferSize);
+
+        if (pShiftedData != nullptr)
+        {
+            MemoryCopy<ValueType>(buffer, 0, pShiftedData, 0, shiftCount);
+        }
+        if (shiftCount < bufferSize)
+        {
+            MemoryMove<ValueType>(buffer, shiftCount, 0, (bufferSize - shiftCount));
+        }
+        MemoryCopy<ValueType>(pNewData, 0, buffer, (bufferSize - shiftCount), shiftCount);
+    }
+
+    template <typename ArgsListType>
+    void IRFunctionEmitter::RegisterFunctionArgs(const ArgsListType& args)
+    {
+        auto argumentsIterator = Arguments().begin();
+        for (size_t i = 0; i < args.size(); ++i)
+        {
+            auto arg = &(*argumentsIterator);
+            _locals.Add(args[i].first, arg);
+            ++argumentsIterator;
+        }
+    }
+} // namespace emitters
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/emitters/include/IRLocalScalar.h b/libraries/emitters/include/IRLocalScalar.h
index 23e37c0cc..ff9e59238 100644
--- a/libraries/emitters/include/IRLocalScalar.h
+++ b/libraries/emitters/include/IRLocalScalar.h
@@ -205,4 +205,261 @@ namespace emitters
 } // namespace emitters
 } // namespace ell
 
-#include "../tcc/IRLocalScalar.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace emitters
+{
+    namespace detail
+    {
+        IREmitter& GetEmitter(IRFunctionEmitter& function);
+
+        template <typename ValueType>
+        IRLocalScalar ToIRLocalScalar(IRFunctionEmitter& function, ValueType value)
+        {
+            return { function, GetEmitter(function).Literal(value) };
+        }
+
+        template <typename ValueType, utilities::IsSignedIntegral<ValueType> = true>
+        ValueType GetConstantIntValue(llvm::ConstantInt* intValue)
+        {
+            return static_cast<ValueType>(intValue->getSExtValue());
+        }
+
+        template <typename ValueType, utilities::IsUnsignedIntegral<ValueType> = true>
+        ValueType GetConstantIntValue(llvm::ConstantInt* intValue)
+        {
+            return static_cast<ValueType>(intValue->getZExtValue());
+        }
+    } // namespace detail
+
+    template <typename ValueType, utilities::IsIntegral<ValueType> /* = true*/>
+    ValueType IRLocalScalar::GetIntValue() const
+    {
+        auto intValue = llvm::cast<llvm::ConstantInt>(this->value);
+        return detail::GetConstantIntValue<ValueType>(intValue);
+    }
+
+    template <typename ValueType, utilities::IsIntegral<ValueType> /* = true*/>
+    ValueType IRLocalScalar::GetIntValue(ValueType defaultValue) const
+    {
+        if (IsConstantInt())
+        {
+            return GetIntValue<ValueType>();
+        }
+
+        return defaultValue;
+    }
+
+    template <typename ValueType, utilities::IsFloatingPoint<ValueType> /* = true*/>
+    ValueType IRLocalScalar::GetFloatValue(ValueType defaultValue) const
+    {
+        if (IsConstantFloat())
+        {
+            return GetFloatValue<ValueType>();
+        }
+        return defaultValue;
+    }
+
+    template <typename ValueType, utilities::IsFundamental<ValueType> /* = true*/>
+    IRLocalScalar operator+(ValueType value, IRLocalScalar b)
+    {
+        auto a = detail::ToIRLocalScalar(b.function, value);
+        return a + b;
+    }
+
+    template <typename ValueType, utilities::IsFundamental<ValueType> /* = true*/>
+    IRLocalScalar operator+(IRLocalScalar a, ValueType value)
+    {
+        return value + a;
+    }
+
+    template <typename ValueType, utilities::IsFundamental<ValueType> /* = true*/>
+    IRLocalScalar operator-(ValueType value, IRLocalScalar b)
+    {
+        auto a = detail::ToIRLocalScalar(b.function, value);
+        return a - b;
+    }
+
+    template <typename ValueType, utilities::IsFundamental<ValueType> /* = true*/>
+    IRLocalScalar operator-(IRLocalScalar a, ValueType value)
+    {
+        auto b = detail::ToIRLocalScalar(a.function, value);
+        return a - b;
+    }
+
+    template <typename ValueType, utilities::IsFundamental<ValueType> /* = true*/>
+    IRLocalScalar operator*(ValueType value, IRLocalScalar b)
+    {
+        auto a = detail::ToIRLocalScalar(b.function, value);
+        return a * b;
+    }
+
+    template <typename ValueType, utilities::IsFundamental<ValueType> /* = true*/>
+    IRLocalScalar operator*(IRLocalScalar a, ValueType value)
+    {
+        return value * a;
+    }
+
+    template <typename ValueType, utilities::IsFundamental<ValueType> /* = true*/>
+    IRLocalScalar operator/(ValueType value, IRLocalScalar b)
+    {
+        auto a = detail::ToIRLocalScalar(b.function, value);
+        return a / b;
+    }
+
+    template <typename ValueType, utilities::IsFundamental<ValueType> /* = true*/>
+    IRLocalScalar operator/(IRLocalScalar a, ValueType value)
+    {
+        auto b = detail::ToIRLocalScalar(a.function, value);
+        return a / b;
+    }
+
+    template <typename ValueType, utilities::IsIntegral<ValueType> /* = true*/>
+    IRLocalScalar operator%(ValueType value, IRLocalScalar b)
+    {
+        auto a = detail::ToIRLocalScalar(b.function, value);
+        return a % b;
+    }
+
+    template <typename ValueType, utilities::IsIntegral<ValueType> /* = true*/>
+    IRLocalScalar operator%(IRLocalScalar a, ValueType value)
+    {
+        auto b = detail::ToIRLocalScalar(a.function, value);
+        return a % b;
+    }
+
+    template <typename ValueType, utilities::IsFundamental<ValueType> /* = true*/>
+    IRLocalScalar operator==(ValueType value, IRLocalScalar b)
+    {
+        auto a = detail::ToIRLocalScalar(b.function, value);
+        return a == b;
+    }
+
+    template <typename ValueType, utilities::IsFundamental<ValueType> /* = true*/>
+    IRLocalScalar operator==(IRLocalScalar a, ValueType value)
+    {
+        return value == a;
+    }
+
+    template <typename ValueType, utilities::IsFundamental<ValueType> /* = true*/>
+    IRLocalScalar operator!=(ValueType value, IRLocalScalar b)
+    {
+        auto a = detail::ToIRLocalScalar(b.function, value);
+        return a != b;
+    }
+
+    template <typename ValueType, utilities::IsFundamental<ValueType> /* = true*/>
+    IRLocalScalar operator!=(IRLocalScalar a, ValueType value)
+    {
+        return value != a;
+    }
+
+    template <typename ValueType, utilities::IsFundamental<ValueType> /* = true*/>
+    IRLocalScalar operator<(ValueType value, IRLocalScalar b)
+    {
+        auto a = detail::ToIRLocalScalar(b.function, value);
+        return a < b;
+    }
+
+    template <typename ValueType, utilities::IsFundamental<ValueType> /* = true*/>
+    IRLocalScalar operator<(IRLocalScalar a, ValueType value)
+    {
+        auto b = detail::ToIRLocalScalar(a.function, value);
+        return a < b;
+    }
+
+    template <typename ValueType, utilities::IsFundamental<ValueType> /* = true*/>
+    IRLocalScalar operator<=(ValueType value, IRLocalScalar b)
+    {
+        auto a = detail::ToIRLocalScalar(b.function, value);
+        return a <= b;
+    }
+
+    template <typename ValueType, utilities::IsFundamental<ValueType> /* = true*/>
+    IRLocalScalar operator<=(IRLocalScalar a, ValueType value)
+    {
+        auto b = detail::ToIRLocalScalar(a.function, value);
+        return a <= b;
+    }
+
+    template <typename ValueType, utilities::IsFundamental<ValueType> /* = true*/>
+    IRLocalScalar operator>(ValueType value, IRLocalScalar b)
+    {
+        auto a = detail::ToIRLocalScalar(b.function, value);
+        return a > b;
+    }
+
+    template <typename ValueType, utilities::IsFundamental<ValueType> /* = true*/>
+    IRLocalScalar operator>(IRLocalScalar a, ValueType value)
+    {
+        auto b = detail::ToIRLocalScalar(a.function, value);
+        return a > b;
+    }
+
+    template <typename ValueType, utilities::IsFundamental<ValueType> /* = true*/>
+    IRLocalScalar operator>=(ValueType value, IRLocalScalar b)
+    {
+        auto a = detail::ToIRLocalScalar(b.function, value);
+        return a >= b;
+    }
+
+    template <typename ValueType, utilities::IsFundamental<ValueType> /* = true*/>
+    IRLocalScalar operator>=(IRLocalScalar a, ValueType value)
+    {
+        auto b = detail::ToIRLocalScalar(a.function, value);
+        return a >= b;
+    }
+
+    //
+    // Math functions
+    //
+    template <typename ValueType>
+    IRLocalScalar Sigmoid(IRLocalScalar a)
+    {
+        auto& fn = a.function;
+        auto& emitter = detail::GetEmitter(fn);
+
+        auto expInput = Exp(a);
+        constexpr auto one = static_cast<ValueType>(1);
+        auto result = emitter.Select(a > ValueType{ 0 }, one / (Exp(-a) + one), expInput / (expInput + one));
+        return { fn, result };
+    }
+
+    template <typename ValueType>
+    IRLocalScalar Tanh(IRLocalScalar a)
+    {
+        // tanh(x) === (exp(x) - exp(-x)) / (exp(x) + exp(-x))
+        //         = 2*sigmoid(2*x) - 1
+        auto two = static_cast<ValueType>(2.0);
+        return (two * Sigmoid<ValueType>(two * a)) - static_cast<ValueType>(1);
+    }
+
+    template <typename ValueType, utilities::IsFundamental<ValueType> /* = true*/>
+    IRLocalScalar Min(ValueType value, IRLocalScalar b)
+    {
+        return Min(detail::ToIRLocalScalar(b.function, value), b);
+    }
+
+    template <typename ValueType, utilities::IsFundamental<ValueType> /* = true*/>
+    IRLocalScalar Min(IRLocalScalar a, ValueType value)
+    {
+        return Min(value, a);
+    }
+
+    template <typename ValueType, utilities::IsFundamental<ValueType> /* = true*/>
+    IRLocalScalar Max(ValueType value, IRLocalScalar b)
+    {
+        return Max(detail::ToIRLocalScalar(b.function, value), b);
+    }
+
+    template <typename ValueType, utilities::IsFundamental<ValueType> /* = true*/>
+    IRLocalScalar Max(IRLocalScalar a, ValueType value)
+    {
+        return Max(value, a);
+    }
+} // namespace emitters
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/emitters/include/IRModuleEmitter.h b/libraries/emitters/include/IRModuleEmitter.h
index 757de145f..7d6825901 100644
--- a/libraries/emitters/include/IRModuleEmitter.h
+++ b/libraries/emitters/include/IRModuleEmitter.h
@@ -763,4 +763,200 @@ namespace emitters
 } // namespace emitters
 } // namespace ell
 
-#include "../tcc/IRModuleEmitter.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace emitters
+{
+    //
+    // Public methods
+    //
+    template <typename ValueType>
+    llvm::GlobalVariable* IRModuleEmitter::Constant(const std::string& name, ValueType value)
+    {
+        return AddGlobal(name, _emitter.Type(GetVariableType<ValueType>()), _emitter.Literal(value), true);
+    }
+
+    template <typename ValueType>
+    llvm::GlobalVariable* IRModuleEmitter::Global(const std::string& name, ValueType value)
+    {
+        return AddGlobal(name, _emitter.Type(GetVariableType<ValueType>()), _emitter.Literal(value), false);
+    }
+
+    template <typename ValueType>
+    llvm::GlobalVariable* IRModuleEmitter::ConstantArray(const std::string& name, const std::vector<ValueType>& value)
+    {
+        return AddGlobal(name, _emitter.ArrayType(GetVariableType<ValueType>(), value.size()), _emitter.Literal(value), true);
+    }
+
+    template <typename ValueType>
+    llvm::GlobalVariable* IRModuleEmitter::GlobalArray(const std::string& name, size_t size)
+    {
+        return GlobalArray(GetVariableType<ValueType>(), name, size);
+    }
+
+    template <typename ValueType>
+    llvm::GlobalVariable* IRModuleEmitter::GlobalArray(const std::string& name, const std::vector<ValueType>& value)
+    {
+        return AddGlobal(name, _emitter.ArrayType(GetVariableType<ValueType>(), value.size()), _emitter.Literal(value), false);
+    }
+
+    //
+    // Private methods
+    //
+
+    template <typename T>
+    LLVMValue IRModuleEmitter::EmitVariable(Variable& var)
+    {
+        // TODO: have a more specific check to see if the variable is mapped to a port, rather than if it's a function input/output
+        if (var.IsScalar() && (var.Scope() != VariableScope::input && var.Scope() != VariableScope::output))
+        {
+            return EmitScalar<T>(var);
+        }
+        else if (var.IsVector())
+        {
+            return EmitVector<T>(var);
+        }
+        else
+        {
+            throw EmitterException(EmitterError::variableTypeNotSupported);
+        }
+    }
+
+    template <typename T>
+    LLVMValue IRModuleEmitter::EmitScalar(Variable& var)
+    {
+        LLVMValue pVal = nullptr;
+        switch (var.Scope())
+        {
+        case VariableScope::literal:
+            pVal = EmitLiteral<T>(static_cast<LiteralVariable<T>&>(var));
+            _literals.Add(var.EmittedName(), pVal);
+            break;
+
+        case VariableScope::local:
+            if (var.IsVectorRef())
+            {
+                pVal = EmitRef<T>(static_cast<VectorElementVariable<T>&>(var));
+            }
+            else if (var.HasInitValue())
+            {
+                pVal = EmitLocal<T>(static_cast<InitializedScalarVariable<T>&>(var));
+            }
+            else
+            {
+                pVal = EmitLocal<T>(static_cast<ScalarVariable<T>&>(var));
+            }
+            break;
+
+        case VariableScope::global:
+            pVal = EmitGlobal<T>(static_cast<InitializedScalarVariable<T>&>(var));
+            break;
+
+        default:
+            throw EmitterException(EmitterError::variableScopeNotSupported);
+        }
+        return pVal;
+    }
+
+    template <typename T>
+    LLVMValue IRModuleEmitter::EmitVector(Variable& var)
+    {
+        LLVMValue pVal = nullptr;
+        switch (var.Scope())
+        {
+        case VariableScope::literal:
+            pVal = EmitLiteralVector<T>(static_cast<LiteralVectorVariable<T>&>(var));
+            _literals.Add(var.EmittedName(), pVal);
+            break;
+
+        case VariableScope::global:
+            if (var.HasInitValue())
+            {
+                pVal = EmitGlobalVector<T>(static_cast<InitializedVectorVariable<T>&>(var));
+            }
+            else
+            {
+                pVal = EmitGlobalVector<T>(static_cast<VectorVariable<T>&>(var));
+            }
+            _globals.Add(var.EmittedName(), pVal);
+            break;
+
+        default:
+            throw EmitterException(EmitterError::variableScopeNotSupported);
+        }
+        assert(pVal != nullptr);
+        return pVal;
+    }
+
+    template <typename T>
+    LLVMValue IRModuleEmitter::EmitLiteral(LiteralVariable<T>& var)
+    {
+        auto& currentFunction = GetCurrentFunction();
+        LLVMValue pVar = currentFunction.Literal(var.Data());
+        return pVar;
+    }
+
+    template <typename T>
+    LLVMValue IRModuleEmitter::EmitGlobal(InitializedScalarVariable<T>& var)
+    {
+        auto& currentFunction = GetCurrentFunction();
+        LLVMValue pVal = nullptr;
+        if (var.IsMutable())
+        {
+            pVal = Global(var.Type(), var.EmittedName());
+            currentFunction.Store(pVal, currentFunction.Literal(var.Data()));
+        }
+        else
+        {
+            pVal = Constant(var.Type(), var.EmittedName(), var.Data());
+        }
+        return pVal;
+    }
+
+    template <typename T>
+    LLVMValue IRModuleEmitter::EmitLocal(ScalarVariable<T>& var)
+    {
+        auto& currentFunction = GetCurrentFunction();
+        return currentFunction.EmittedVariable(var.Type(), var.EmittedName());
+    }
+
+    template <typename T>
+    LLVMValue IRModuleEmitter::EmitLocal(InitializedScalarVariable<T>& var)
+    {
+        auto& currentFunction = GetCurrentFunction();
+        LLVMValue pVar = currentFunction.EmittedVariable(var.Type(), var.EmittedName());
+        currentFunction.Store(pVar, currentFunction.Literal(var.Data()));
+        return pVar;
+    }
+
+    template <typename T>
+    LLVMValue IRModuleEmitter::EmitLiteralVector(LiteralVectorVariable<T>& var)
+    {
+        return ConstantArray(var.EmittedName(), var.Data());
+    }
+
+    template <typename T>
+    LLVMValue IRModuleEmitter::EmitGlobalVector(VectorVariable<T>& var)
+    {
+        return GlobalArray(GetVariableType<T>(), var.EmittedName(), var.Dimension());
+    }
+
+    template <typename T>
+    LLVMValue IRModuleEmitter::EmitGlobalVector(InitializedVectorVariable<T>& var)
+    {
+        return GlobalArray(var.EmittedName(), var.Data());
+    }
+
+    template <typename T>
+    LLVMValue IRModuleEmitter::EmitRef(VectorElementVariable<T>& var)
+    {
+        auto& currentFunction = GetCurrentFunction();
+        LLVMValue pSrcVar = EnsureEmitted(var.Src());
+        return currentFunction.PtrOffsetA(pSrcVar, currentFunction.Literal(var.Offset()), var.EmittedName());
+    }
+} // namespace emitters
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/emitters/include/IRRuntime.h b/libraries/emitters/include/IRRuntime.h
index f20ce5953..461304896 100644
--- a/libraries/emitters/include/IRRuntime.h
+++ b/libraries/emitters/include/IRRuntime.h
@@ -180,4 +180,79 @@ namespace emitters
 } // namespace emitters
 } // namespace ell
 
-#include "../tcc/IRRuntime.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace emitters
+{
+    template <typename ValueType>
+    LLVMFunction IRRuntime::GetSqrtFunction()
+    {
+        return GetSqrtFunction(GetVariableType<ValueType>());
+    }
+
+    template <typename ValueType>
+    LLVMFunction IRRuntime::GetAbsFunction()
+    {
+        return GetAbsFunction(GetVariableType<ValueType>());
+    }
+
+    template <typename ValueType>
+    LLVMFunction IRRuntime::GetExpFunction()
+    {
+        return GetExpFunction(GetVariableType<ValueType>());
+    }
+
+    template <typename ValueType>
+    LLVMFunction IRRuntime::GetLogFunction()
+    {
+        return GetLogFunction(GetVariableType<ValueType>());
+    }
+
+    template <typename ValueType>
+    LLVMFunction IRRuntime::GetTanhFunction()
+    {
+        return GetTanhFunction(GetVariableType<ValueType>());
+    }
+
+    template <typename ValueType>
+    LLVMFunction IRRuntime::GetSinFunction()
+    {
+        return GetSinFunction(GetVariableType<ValueType>());
+    }
+
+    template <typename ValueType>
+    LLVMFunction IRRuntime::GetCosFunction()
+    {
+        return GetCosFunction(GetVariableType<ValueType>());
+    }
+
+    template <typename ValueType>
+    LLVMFunction IRRuntime::GetDotProductFunction()
+    {
+        if (std::is_integral<std::decay_t<ValueType>>::value)
+        {
+            if (_dotProductFunction == nullptr)
+            {
+                _dotProductFunction = GetDotProductIntFunction();
+            }
+            return _dotProductFunction;
+        }
+        else if (std::is_floating_point<std::decay_t<ValueType>>::value)
+        {
+            if (_dotProductFunctionFloat == nullptr)
+            {
+                _dotProductFunctionFloat = GetDotProductFloatFunction();
+            }
+            return _dotProductFunctionFloat;
+        }
+        else
+        {
+            throw utilities::InputException(utilities::InputExceptionErrors::typeMismatch);
+        }
+    }
+} // namespace emitters
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/emitters/include/IRVectorUtilities.h b/libraries/emitters/include/IRVectorUtilities.h
index 5e73d6e3a..4b9f7cdf0 100644
--- a/libraries/emitters/include/IRVectorUtilities.h
+++ b/libraries/emitters/include/IRVectorUtilities.h
@@ -54,4 +54,80 @@ namespace emitters
 } // namespace emitters
 } // namespace ell
 
-#include "../tcc/IRVectorUtilities.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace emitters
+{
+    template <typename ValueType, utilities::IsFloatingPoint<ValueType>>
+    LLVMValue FillVector(IRFunctionEmitter& function, llvm::VectorType* type, ValueType elementValue)
+    {
+        return llvm::ConstantFP::get(type, elementValue);
+    }
+
+    template <typename ValueType, utilities::IsIntegral<ValueType>>
+    LLVMValue FillVector(IRFunctionEmitter& function, llvm::VectorType* type, ValueType elementValue)
+    {
+        return llvm::ConstantInt::get(type, elementValue, true);
+    }
+
+    // Emit explicit vectorized code to compute the sum of all the elements in a vector.
+    // Hopefully, the vecorizing optimizer will take care of this when vecorizing simple
+    // loops to sum up values, but for other operations we may want to do it ourselves.
+    //
+    // Runs in logN time by recursively splitting the vector in half and summing the halves.
+    // Example:
+    //   <1, 2, 3, 4, 5, 6, 7, 8> --> <1, 2, 3, 4> + <5, 6, 7, 8>    ( == <6, 8, 10, 12> )
+    //   <6, 8, 10, 12> --> <6, 8> + <10, 12>    ( == <16, 20> )
+    //   <16, 20> --> 16 + 20    ( == 36 )
+    template <typename ValueType>
+    LLVMValue HorizontalVectorSum(IRFunctionEmitter& function, LLVMValue vectorValue)
+    {
+        LLVMType type = vectorValue->getType();
+
+        // Allow calling HorizontalVectorSum to be a no-op on a scalar
+        if (!type->isVectorTy())
+        {
+            return vectorValue;
+        }
+
+        llvm::VectorType* vecType = llvm::cast<llvm::VectorType>(type);
+        assert(vecType != nullptr);
+
+        int vectorSize = vecType->getNumElements();
+        IREmitter& emitter = function.GetEmitter();
+
+        // Take care of the edge case of 1-element vectors
+        if (vectorSize == 1)
+        {
+            return emitter.GetIRBuilder().CreateExtractElement(vectorValue, static_cast<uint64_t>(0));
+        }
+
+        // Repeatedly split the vector into two halves, and add the two halves together
+        auto undef = llvm::UndefValue::get(type); // This undef is to tell LLVM we don't care what goes in the second operand of the shufflevector instruction
+        while (vectorSize > 2)
+        {
+            assert(vectorSize % 2 == 0); // vectorSize must be a power of 2
+            std::vector<uint32_t> elementIndices1;
+            std::vector<uint32_t> elementIndices2;
+            for (int index = 0; index < vectorSize / 2; ++index)
+            {
+                elementIndices1.push_back(index); // Collect indices [0, vectorSize/2)
+                elementIndices2.push_back((vectorSize / 2) + index); // Collect indices [vectorSize/2, vectorSize)
+            }
+            auto half1 = emitter.GetIRBuilder().CreateShuffleVector(vectorValue, undef, elementIndices1); // Extract elements [0, vectorSize/2)
+            auto half2 = emitter.GetIRBuilder().CreateShuffleVector(vectorValue, undef, elementIndices2); // Extract elements [vectorSize/2, vectorSize)
+            vectorValue = function.Operator(emitters::GetAddForValueType<ValueType>(), half1, half2);
+            vectorSize /= 2;
+        }
+
+        assert(vectorSize == 2);
+        auto half1 = emitter.GetIRBuilder().CreateExtractElement(vectorValue, static_cast<uint64_t>(0));
+        auto half2 = emitter.GetIRBuilder().CreateExtractElement(vectorValue, static_cast<uint64_t>(1));
+        return function.Operator(emitters::GetAddForValueType<ValueType>(), half1, half2);
+    }
+} // namespace emitters
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/emitters/include/ScalarVariable.h b/libraries/emitters/include/ScalarVariable.h
index fe6a12c6f..51f7c724d 100644
--- a/libraries/emitters/include/ScalarVariable.h
+++ b/libraries/emitters/include/ScalarVariable.h
@@ -76,4 +76,40 @@ namespace emitters
 } // namespace emitters
 } // namespace ell
 
-#include "../tcc/ScalarVariable.tcc"
\ No newline at end of file
+#pragma region implementation
+
+namespace ell
+{
+namespace emitters
+{
+    template <typename T>
+    ScalarVariable<T>::ScalarVariable(const VariableScope scope, int flags) :
+        Variable(GetVariableType<T>(), scope, flags)
+    {
+    }
+
+    template <typename T>
+    LiteralVariable<T>::LiteralVariable(T data) :
+        ScalarVariable<T>(VariableScope::literal, Variable::VariableFlags::none),
+        _data(data)
+    {
+    }
+
+    template <typename T>
+    InitializedScalarVariable<T>::InitializedScalarVariable(const VariableScope scope, T data, bool isMutable) :
+        ScalarVariable<T>(scope, isMutable ? (Variable::VariableFlags::isMutable | Variable::VariableFlags::hasInitValue) : Variable::VariableFlags::hasInitValue),
+        _data(data)
+    {
+    }
+
+    template <typename T>
+    VectorElementVariable<T>::VectorElementVariable(Variable& src, int offset) :
+        ScalarVariable<T>(VariableScope::local, Variable::VariableFlags::isVectorRef),
+        _src(src),
+        _offset(offset)
+    {
+    }
+} // namespace emitters
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/emitters/include/SymbolTable.h b/libraries/emitters/include/SymbolTable.h
index a3d402e30..9f4a6f3da 100644
--- a/libraries/emitters/include/SymbolTable.h
+++ b/libraries/emitters/include/SymbolTable.h
@@ -70,4 +70,79 @@ namespace emitters
     };
 } // namespace emitters
 } // namespace ell
-#include "../tcc/SymbolTable.tcc"
+
+#pragma region implementation
+
+namespace ell
+{
+namespace emitters
+{
+    template <typename ValueType, ValueType DefaultValue>
+    SymbolTable<ValueType, DefaultValue>::SymbolTable(std::initializer_list<SymbolValue> values) :
+        _map(values)
+    {
+    }
+
+    template <typename ValueType, ValueType DefaultValue>
+    ValueType SymbolTable<ValueType, DefaultValue>::Get(const std::string& name) const
+    {
+        ValueType value = DefaultValue;
+        auto search = _map.find(name);
+        if (search != _map.end())
+        {
+            value = search->second;
+        }
+        return value;
+    }
+
+    template <typename ValueType, ValueType DefaultValue>
+    void SymbolTable<ValueType, DefaultValue>::Add(const std::string& name, ValueType value)
+    {
+        if (_map.count(name) > 0)
+        {
+            throw EmitterException(EmitterError::duplicateSymbol);
+        }
+        _map[std::move(name)] = std::move(value);
+    }
+
+    template <typename ValueType, ValueType DefaultValue>
+    std::string SymbolTable<ValueType, DefaultValue>::GetUniqueName(const std::string& namePrefix) const
+    {
+        // return namePrefix;
+        int index = 0;
+        while (true)
+        {
+            auto name = namePrefix + "_" + std::to_string(index);
+            if (!Contains(name))
+            {
+                return name;
+            }
+            ++index;
+        }
+    }
+
+    template <typename ValueType, ValueType DefaultValue>
+    bool SymbolTable<ValueType, DefaultValue>::Contains(const std::string& name) const
+    {
+        return (Get(name) != DefaultValue);
+    }
+
+    template <typename ValueType, ValueType DefaultValue>
+    void SymbolTable<ValueType, DefaultValue>::Remove(const std::string& name)
+    {
+        auto search = _map.find(name);
+        if (search != _map.end())
+        {
+            _map.erase(search);
+        }
+    }
+
+    template <typename ValueType, ValueType DefaultValue>
+    void SymbolTable<ValueType, DefaultValue>::Clear()
+    {
+        _map.clear();
+    }
+} // namespace emitters
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/emitters/include/Variable.h b/libraries/emitters/include/Variable.h
index b197c67c5..139fa54e4 100644
--- a/libraries/emitters/include/Variable.h
+++ b/libraries/emitters/include/Variable.h
@@ -193,4 +193,72 @@ namespace emitters
 } // namespace emitters
 } // namespace ell
 
-#include "../tcc/Variable.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace emitters
+{
+
+    template <typename T>
+    class ScalarVariable;
+
+    template <typename T>
+    class InitializedScalarVariable;
+
+    template <typename T>
+    class VectorVariable;
+
+    template <typename T>
+    class InitializedVectorVariable;
+
+    //
+    // VariableAllocator
+    //
+    template <typename VarType, typename... Args>
+    VarType* VariableAllocator::AddVariable(Args&&... args)
+    {
+        static_assert(std::is_base_of<Variable, VarType>::value, "AddVariable requires you inherit from Variable");
+
+        auto var = std::make_shared<VarType>(args...);
+        VarType* pVar = var.get();
+        _variables.push_back(var);
+        return pVar;
+    }
+
+    template <typename ElementType>
+    Variable* VariableAllocator::AddScalarVariable(VariableScope scope)
+    {
+        return AddVariable<ScalarVariable<ElementType>>(scope);
+    }
+
+    template <typename ElementType>
+    Variable* VariableAllocator::AddScalarVariable(VariableScope scope, ElementType value)
+    {
+        return AddVariable<InitializedScalarVariable<ElementType>>(scope, value);
+    }
+
+    template <typename ElementType>
+    Variable* VariableAllocator::AddVectorVariable(VariableScope scope, int size)
+    {
+        return AddVariable<VectorVariable<ElementType>>(scope, size);
+    }
+
+    /// <summary> Add a vector, with all elements initialized to a given value </summary>
+    template <typename ElementType>
+    Variable* VariableAllocator::AddVectorVariable(VariableScope scope, int size, ElementType value)
+    {
+        std::vector<ElementType> data(size, value);
+        return AddVariable<InitializedVectorVariable<ElementType>>(scope, data);
+    }
+
+    /// <summary> Add a vector, initialized to a given vector </summary>
+    template <typename ElementType>
+    Variable* VariableAllocator::AddVectorVariable(VariableScope scope, const std::vector<ElementType>& data)
+    {
+        return AddVariable<InitializedVectorVariable<ElementType>>(scope, data);
+    }
+} // namespace emitters
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/emitters/include/VectorVariable.h b/libraries/emitters/include/VectorVariable.h
index fd3089653..287e4c851 100644
--- a/libraries/emitters/include/VectorVariable.h
+++ b/libraries/emitters/include/VectorVariable.h
@@ -78,4 +78,53 @@ namespace emitters
 } // namespace emitters
 } // namespace ell
 
-#include "../tcc/VectorVariable.tcc"
\ No newline at end of file
+#pragma region implementation
+
+namespace ell
+{
+namespace emitters
+{
+    //
+    // VectorVariable
+    //
+    template <typename T>
+    VectorVariable<T>::VectorVariable(const VariableScope scope, const size_t size, int flags) :
+        Variable(GetVariableType<T>(), scope, flags),
+        _size(size)
+    {
+    }
+
+    //
+    // InitializedVectorVariable
+    //
+    template <typename T>
+    InitializedVectorVariable<T>::InitializedVectorVariable(const VariableScope scope, const std::vector<T>& data, int flags) :
+        VectorVariable<T>(scope, data.size(), flags | Variable::VariableFlags::hasInitValue)
+    {
+        _initialData = VariableValueType<T>::ToVariableVector(data);
+    }
+
+    template <typename T>
+    InitializedVectorVariable<T>::InitializedVectorVariable(const VariableScope scope, size_t size, int flags) :
+        VectorVariable<T>(scope, size, flags | Variable::VariableFlags::hasInitValue)
+    {
+        T defValue = GetDefaultValue<ElementType>();
+        for (size_t i = 0; i < size; ++i)
+        {
+            _initialData.emplace_back(defValue);
+        }
+    }
+
+    //
+    // LiteralVectorVariable
+    //
+    template <typename T>
+    LiteralVectorVariable<T>::LiteralVectorVariable(const std::vector<T>& data) :
+        VectorVariable<T>(VariableScope::literal, data.size(), Variable::VariableFlags::none)
+    {
+        _data = VariableValueType<T>::ToVariableVector(data);
+    }
+} // namespace emitters
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/emitters/tcc/CompilableIRFunction.tcc b/libraries/emitters/tcc/CompilableIRFunction.tcc
deleted file mode 100644
index 35e9b359d..000000000
--- a/libraries/emitters/tcc/CompilableIRFunction.tcc
+++ /dev/null
@@ -1,26 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     CompilableIRFunction.tcc (emitters)
-//  Authors:  Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace emitters
-{
-    template <typename ValueType>
-    ValueType IRAddFunction<ValueType>::Compute(ValueType x, ValueType y) const
-    {
-        return x + y;
-    }
-
-    template <typename ValueType>
-    LLVMValue IRAddFunction<ValueType>::Compile(IRFunctionEmitter& function, LLVMValue x, LLVMValue y) const
-    {
-        LLVMValue sum = function.Operator(emitters::GetAddForValueType<ValueType>(), x, y);
-        return sum;
-    }
-} // namespace emitters
-} // namespace ell
diff --git a/libraries/emitters/tcc/EmitterTypes.tcc b/libraries/emitters/tcc/EmitterTypes.tcc
deleted file mode 100644
index 5b6ba07c6..000000000
--- a/libraries/emitters/tcc/EmitterTypes.tcc
+++ /dev/null
@@ -1,38 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     EmitterTypes.tcc (emitter)
-//  Authors:  Umesh Madan, Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace emitters
-{
-    template <typename T>
-    std::vector<typename VariableValueType<T>::DestType> VariableValueType<T>::ToVariableVector(const std::vector<T>& src)
-    {
-        return src;
-    }
-
-    template <typename T>
-    std::vector<T> VariableValueType<T>::FromVariableVector(const std::vector<typename VariableValueType<T>::DestType>& src)
-    {
-        return src;
-    }
-
-    // bool specialization
-    inline std::vector<typename VariableValueType<bool>::DestType> VariableValueType<bool>::ToVariableVector(const std::vector<bool>& src)
-    {
-        std::vector<VariableValueType<bool>::DestType> result(src.begin(), src.end());
-        return result;
-    }
-
-    inline std::vector<bool> VariableValueType<bool>::FromVariableVector(const std::vector<typename VariableValueType<bool>::DestType>& src)
-    {
-        std::vector<bool> result(src.begin(), src.end());
-        return result;
-    }
-} // namespace emitters
-} // namespace ell
diff --git a/libraries/emitters/tcc/IREmitter.tcc b/libraries/emitters/tcc/IREmitter.tcc
deleted file mode 100644
index 83f090530..000000000
--- a/libraries/emitters/tcc/IREmitter.tcc
+++ /dev/null
@@ -1,45 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     IREmitter.tcc (emitters)
-//  Authors:  Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace emitters
-{
-    template <typename OutputType>
-    LLVMValue IREmitter::CastValue(LLVMValue pValue)
-    {
-        auto outputType = GetVariableType<OutputType>();
-        return CastValue(pValue, outputType);
-    }
-
-    template <typename OutputType>
-    LLVMValue IREmitter::CastUnsignedValue(LLVMValue pValue)
-    {
-        auto outputType = GetVariableType<OutputType>();
-        return CastUnsignedValue(pValue, outputType);
-    }
-
-    template <typename ValueType>
-    llvm::Constant* IREmitter::Pointer(ValueType* ptr)
-    {
-        auto ptrValue = Literal(reinterpret_cast<int64_t>(ptr));
-        auto ptrType = PointerType(GetVariableType<ValueType>());
-        return llvm::ConstantExpr::getIntToPtr(ptrValue, ptrType);
-    }
-
-    template <typename ListType>
-    void IREmitter::BindArgumentNames(LLVMFunction pFunction, const ListType& arguments)
-    {
-        size_t i = 0;
-        for (auto& argument : pFunction->args())
-        {
-            argument.setName(arguments[i++].first);
-        }
-    }
-} // namespace emitters
-} // namespace ell
diff --git a/libraries/emitters/tcc/IRFunctionEmitter.tcc b/libraries/emitters/tcc/IRFunctionEmitter.tcc
deleted file mode 100644
index 907219a20..000000000
--- a/libraries/emitters/tcc/IRFunctionEmitter.tcc
+++ /dev/null
@@ -1,174 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     IRFunctionEmitter.tcc (emitters)
-//  Authors:  Umesh Madan, Chuck Jacobs, Kern Handa
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace emitters
-{
-    template <typename ValueType, utilities::IsFundamental<ValueType>>
-    IRLocalScalar IRFunctionEmitter::LocalScalar(ValueType value)
-    {
-        return IRLocalScalar(*this, Literal(value));
-    }
-
-    template <typename ValueType>
-    LLVMValue IRFunctionEmitter::Literal(ValueType value)
-    {
-        return _pEmitter->Literal(value);
-    }
-
-    template <typename ValueType>
-    LLVMValue IRFunctionEmitter::Pointer(ValueType* value)
-    {
-        return _pEmitter->Pointer(value);
-    }
-
-    template <typename OutputType>
-    LLVMValue IRFunctionEmitter::CastValue(LLVMValue pValue)
-    {
-        return _pEmitter->CastValue<OutputType>(pValue);
-    }
-
-    template <typename OutputType>
-    LLVMValue IRFunctionEmitter::CastUnsignedValue(LLVMValue pValue)
-    {
-        return _pEmitter->CastValue<OutputType>(pValue);
-    }
-
-    template <typename ValueType>
-    void IRFunctionEmitter::VectorOperator(TypedOperator type, size_t size, ValueType leftValue, LLVMValue pRightValue, std::function<void(LLVMValue, LLVMValue)> aggregator)
-    {
-        assert(pRightValue != nullptr);
-
-        LLVMValue pLeftItem = Literal(leftValue);
-        For(size, [pLeftItem, pRightValue, type, aggregator](IRFunctionEmitter& fn, LLVMValue i) {
-            LLVMValue pRightItem = fn.ValueAt(pRightValue, i);
-            LLVMValue pTemp = fn.Operator(type, pLeftItem, pRightItem);
-            aggregator(i, pTemp);
-        });
-    }
-
-    template <typename ValueType>
-    void IRFunctionEmitter::VectorOperator(TypedOperator type, size_t size, LLVMValue pLeftValue, ValueType rightValue, std::function<void(LLVMValue, LLVMValue)> aggregator)
-    {
-        assert(pLeftValue != nullptr);
-
-        LLVMValue pRightItem = Literal(rightValue);
-        For(size, [pLeftValue, pRightItem, type, aggregator](IRFunctionEmitter& fn, LLVMValue i) {
-            LLVMValue pLeftItem = fn.ValueAt(pLeftValue, i);
-            LLVMValue pTemp = fn.Operator(type, pLeftItem, pRightItem);
-            aggregator(i, pTemp);
-        });
-    }
-
-    template <typename ValueType>
-    LLVMValue IRFunctionEmitter::Malloc(int64_t size)
-    {
-        return Malloc(GetVariableType<ValueType>(), size);
-    }
-
-    template <typename ValueType>
-    void IRFunctionEmitter::MemoryMove(LLVMValue pPointer, int sourceOffset, int destinationOffset, int count)
-    {
-        assert(pPointer != nullptr);
-        auto pSource = PointerOffset(pPointer, Literal(sourceOffset));
-        auto pDestination = PointerOffset(pPointer, Literal(destinationOffset));
-        int byteCount = count * sizeof(ValueType);
-        _pEmitter->MemoryMove(pSource, pDestination, Literal(byteCount));
-    }
-
-    template <typename ValueType>
-    void IRFunctionEmitter::MemoryCopy(LLVMValue pSourcePointer, LLVMValue pDestinationPointer, int count)
-    {
-        auto pSource = PointerOffset(pSourcePointer, 0);
-        auto pDestination = PointerOffset(pDestinationPointer, 0);
-        auto byteCount = count * sizeof(ValueType);
-        _pEmitter->MemoryCopy(pSource, pDestination, Literal<int>(byteCount));
-    }
-
-    template <typename ValueType>
-    void IRFunctionEmitter::MemoryCopy(LLVMValue pSourcePointer, LLVMValue pDestinationPointer, LLVMValue count)
-    {
-        auto pSource = PointerOffset(pSourcePointer, 0);
-        auto pDestination = PointerOffset(pDestinationPointer, 0);
-        auto byteCount = Operator(emitters::TypedOperator::multiply, count, Literal<int>(sizeof(ValueType)));
-        _pEmitter->MemoryCopy(pSource, pDestination, byteCount);
-    }
-
-    template <typename ValueType>
-    void IRFunctionEmitter::MemoryCopy(LLVMValue pSourcePointer, int sourceOffset, LLVMValue pDestinationPointer, int destinationOffset, int count)
-    {
-        auto pSource = PointerOffset(pSourcePointer, Literal(sourceOffset));
-        auto pDestination = PointerOffset(pDestinationPointer, Literal(destinationOffset));
-        int byteCount = count * sizeof(ValueType);
-        _pEmitter->MemoryCopy(pSource, pDestination, Literal(byteCount));
-    }
-
-    template <typename ValueType>
-    void IRFunctionEmitter::MemoryCopy(LLVMValue pSourcePointer, LLVMValue sourceOffset, LLVMValue pDestinationPointer, LLVMValue destinationOffset, LLVMValue count)
-    {
-        auto pSource = PointerOffset(pSourcePointer, sourceOffset);
-        auto pDestination = PointerOffset(pDestinationPointer, destinationOffset);
-        auto byteCount = Operator(emitters::TypedOperator::multiply, count, Literal<int>(sizeof(ValueType)));
-        _pEmitter->MemoryCopy(pSource, pDestination, byteCount);
-    }
-
-    template <typename ValueType>
-    void IRFunctionEmitter::MemorySet(LLVMValue pDestinationPointer, int destinationOffset, LLVMValue value, int count)
-    {
-        auto pDestination = PointerOffset(pDestinationPointer, Literal(destinationOffset));
-        int byteCount = count * sizeof(ValueType);
-        _pEmitter->MemorySet(pDestination, value, Literal(byteCount));
-    }
-
-    template <typename ValueType>
-    void IRFunctionEmitter::MemorySet(LLVMValue pDestinationPointer, LLVMValue pDestinationOffset, LLVMValue value, int count)
-    {
-        auto pDestination = PointerOffset(pDestinationPointer, pDestinationOffset);
-        int byteCount = count * sizeof(ValueType);
-        _pEmitter->MemorySet(pDestination, value, Literal(byteCount));
-    }
-
-    template <typename ValueType>
-    void IRFunctionEmitter::MemorySet(LLVMValue pDestinationPointer, LLVMValue pDestinationOffset, LLVMValue value, LLVMValue count)
-    {
-        auto pDestination = PointerOffset(pDestinationPointer, pDestinationOffset);
-        auto byteCount = Operator(emitters::TypedOperator::multiply, count, Literal<int>(sizeof(ValueType)));
-        _pEmitter->MemorySet(pDestination, value, byteCount);
-    }
-
-    template <typename ValueType>
-    void IRFunctionEmitter::ShiftAndUpdate(LLVMValue buffer, int bufferSize, int shiftCount, LLVMValue pNewData, LLVMValue pShiftedData)
-    {
-        assert(buffer != nullptr);
-        assert(shiftCount <= bufferSize);
-
-        if (pShiftedData != nullptr)
-        {
-            MemoryCopy<ValueType>(buffer, 0, pShiftedData, 0, shiftCount);
-        }
-        if (shiftCount < bufferSize)
-        {
-            MemoryMove<ValueType>(buffer, shiftCount, 0, (bufferSize - shiftCount));
-        }
-        MemoryCopy<ValueType>(pNewData, 0, buffer, (bufferSize - shiftCount), shiftCount);
-    }
-
-    template <typename ArgsListType>
-    void IRFunctionEmitter::RegisterFunctionArgs(const ArgsListType& args)
-    {
-        auto argumentsIterator = Arguments().begin();
-        for (size_t i = 0; i < args.size(); ++i)
-        {
-            auto arg = &(*argumentsIterator);
-            _locals.Add(args[i].first, arg);
-            ++argumentsIterator;
-        }
-    }
-} // namespace emitters
-} // namespace ell
diff --git a/libraries/emitters/tcc/IRLocalScalar.tcc b/libraries/emitters/tcc/IRLocalScalar.tcc
deleted file mode 100644
index ee318287e..000000000
--- a/libraries/emitters/tcc/IRLocalScalar.tcc
+++ /dev/null
@@ -1,262 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     IRLocalValueOperations.tcc (emitters)
-//  Authors:  Kern Handa
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace emitters
-{
-    namespace detail
-    {
-        IREmitter& GetEmitter(IRFunctionEmitter& function);
-
-        template <typename ValueType>
-        IRLocalScalar ToIRLocalScalar(IRFunctionEmitter& function, ValueType value)
-        {
-            return { function, GetEmitter(function).Literal(value) };
-        }
-
-        template <typename ValueType, utilities::IsSignedIntegral<ValueType> = true>
-        ValueType GetConstantIntValue(llvm::ConstantInt* intValue)
-        {
-            return static_cast<ValueType>(intValue->getSExtValue());
-        }
-
-        template <typename ValueType, utilities::IsUnsignedIntegral<ValueType> = true>
-        ValueType GetConstantIntValue(llvm::ConstantInt* intValue)
-        {
-            return static_cast<ValueType>(intValue->getZExtValue());
-        }
-    } // namespace detail
-
-    template <typename ValueType, utilities::IsIntegral<ValueType> /* = true*/>
-    ValueType IRLocalScalar::GetIntValue() const
-    {
-        auto intValue = llvm::cast<llvm::ConstantInt>(this->value);
-        return detail::GetConstantIntValue<ValueType>(intValue);
-    }
-
-    template <typename ValueType, utilities::IsIntegral<ValueType> /* = true*/>
-    ValueType IRLocalScalar::GetIntValue(ValueType defaultValue) const
-    {
-        if (IsConstantInt())
-        {
-            return GetIntValue<ValueType>();
-        }
-
-        return defaultValue;
-    }
-
-    template <typename ValueType, utilities::IsFloatingPoint<ValueType> /* = true*/>
-    ValueType IRLocalScalar::GetFloatValue(ValueType defaultValue) const
-    {
-        if (IsConstantFloat())
-        {
-            return GetFloatValue<ValueType>();
-        }
-        return defaultValue;
-    }
-
-    template <typename ValueType, utilities::IsFundamental<ValueType> /* = true*/>
-    IRLocalScalar operator+(ValueType value, IRLocalScalar b)
-    {
-        auto a = detail::ToIRLocalScalar(b.function, value);
-        return a + b;
-    }
-
-    template <typename ValueType, utilities::IsFundamental<ValueType> /* = true*/>
-    IRLocalScalar operator+(IRLocalScalar a, ValueType value)
-    {
-        return value + a;
-    }
-
-    template <typename ValueType, utilities::IsFundamental<ValueType> /* = true*/>
-    IRLocalScalar operator-(ValueType value, IRLocalScalar b)
-    {
-        auto a = detail::ToIRLocalScalar(b.function, value);
-        return a - b;
-    }
-
-    template <typename ValueType, utilities::IsFundamental<ValueType> /* = true*/>
-    IRLocalScalar operator-(IRLocalScalar a, ValueType value)
-    {
-        auto b = detail::ToIRLocalScalar(a.function, value);
-        return a - b;
-    }
-
-    template <typename ValueType, utilities::IsFundamental<ValueType> /* = true*/>
-    IRLocalScalar operator*(ValueType value, IRLocalScalar b)
-    {
-        auto a = detail::ToIRLocalScalar(b.function, value);
-        return a * b;
-    }
-
-    template <typename ValueType, utilities::IsFundamental<ValueType> /* = true*/>
-    IRLocalScalar operator*(IRLocalScalar a, ValueType value)
-    {
-        return value * a;
-    }
-
-    template <typename ValueType, utilities::IsFundamental<ValueType> /* = true*/>
-    IRLocalScalar operator/(ValueType value, IRLocalScalar b)
-    {
-        auto a = detail::ToIRLocalScalar(b.function, value);
-        return a / b;
-    }
-
-    template <typename ValueType, utilities::IsFundamental<ValueType> /* = true*/>
-    IRLocalScalar operator/(IRLocalScalar a, ValueType value)
-    {
-        auto b = detail::ToIRLocalScalar(a.function, value);
-        return a / b;
-    }
-
-    template <typename ValueType, utilities::IsIntegral<ValueType> /* = true*/>
-    IRLocalScalar operator%(ValueType value, IRLocalScalar b)
-    {
-        auto a = detail::ToIRLocalScalar(b.function, value);
-        return a % b;
-    }
-
-    template <typename ValueType, utilities::IsIntegral<ValueType> /* = true*/>
-    IRLocalScalar operator%(IRLocalScalar a, ValueType value)
-    {
-        auto b = detail::ToIRLocalScalar(a.function, value);
-        return a % b;
-    }
-
-    template <typename ValueType, utilities::IsFundamental<ValueType> /* = true*/>
-    IRLocalScalar operator==(ValueType value, IRLocalScalar b)
-    {
-        auto a = detail::ToIRLocalScalar(b.function, value);
-        return a == b;
-    }
-
-    template <typename ValueType, utilities::IsFundamental<ValueType> /* = true*/>
-    IRLocalScalar operator==(IRLocalScalar a, ValueType value)
-    {
-        return value == a;
-    }
-
-    template <typename ValueType, utilities::IsFundamental<ValueType> /* = true*/>
-    IRLocalScalar operator!=(ValueType value, IRLocalScalar b)
-    {
-        auto a = detail::ToIRLocalScalar(b.function, value);
-        return a != b;
-    }
-
-    template <typename ValueType, utilities::IsFundamental<ValueType> /* = true*/>
-    IRLocalScalar operator!=(IRLocalScalar a, ValueType value)
-    {
-        return value != a;
-    }
-
-    template <typename ValueType, utilities::IsFundamental<ValueType> /* = true*/>
-    IRLocalScalar operator<(ValueType value, IRLocalScalar b)
-    {
-        auto a = detail::ToIRLocalScalar(b.function, value);
-        return a < b;
-    }
-
-    template <typename ValueType, utilities::IsFundamental<ValueType> /* = true*/>
-    IRLocalScalar operator<(IRLocalScalar a, ValueType value)
-    {
-        auto b = detail::ToIRLocalScalar(a.function, value);
-        return a < b;
-    }
-
-    template <typename ValueType, utilities::IsFundamental<ValueType> /* = true*/>
-    IRLocalScalar operator<=(ValueType value, IRLocalScalar b)
-    {
-        auto a = detail::ToIRLocalScalar(b.function, value);
-        return a <= b;
-    }
-
-    template <typename ValueType, utilities::IsFundamental<ValueType> /* = true*/>
-    IRLocalScalar operator<=(IRLocalScalar a, ValueType value)
-    {
-        auto b = detail::ToIRLocalScalar(a.function, value);
-        return a <= b;
-    }
-
-    template <typename ValueType, utilities::IsFundamental<ValueType> /* = true*/>
-    IRLocalScalar operator>(ValueType value, IRLocalScalar b)
-    {
-        auto a = detail::ToIRLocalScalar(b.function, value);
-        return a > b;
-    }
-
-    template <typename ValueType, utilities::IsFundamental<ValueType> /* = true*/>
-    IRLocalScalar operator>(IRLocalScalar a, ValueType value)
-    {
-        auto b = detail::ToIRLocalScalar(a.function, value);
-        return a > b;
-    }
-
-    template <typename ValueType, utilities::IsFundamental<ValueType> /* = true*/>
-    IRLocalScalar operator>=(ValueType value, IRLocalScalar b)
-    {
-        auto a = detail::ToIRLocalScalar(b.function, value);
-        return a >= b;
-    }
-
-    template <typename ValueType, utilities::IsFundamental<ValueType> /* = true*/>
-    IRLocalScalar operator>=(IRLocalScalar a, ValueType value)
-    {
-        auto b = detail::ToIRLocalScalar(a.function, value);
-        return a >= b;
-    }
-
-    //
-    // Math functions
-    //
-    template <typename ValueType>
-    IRLocalScalar Sigmoid(IRLocalScalar a)
-    {
-        auto& fn = a.function;
-        auto& emitter = detail::GetEmitter(fn);
-
-        auto expInput = Exp(a);
-        constexpr auto one = static_cast<ValueType>(1);
-        auto result = emitter.Select(a > ValueType{ 0 }, one / (Exp(-a) + one), expInput / (expInput + one));
-        return { fn, result };
-    }
-
-    template <typename ValueType>
-    IRLocalScalar Tanh(IRLocalScalar a)
-    {
-        // tanh(x) === (exp(x) - exp(-x)) / (exp(x) + exp(-x))
-        //         = 2*sigmoid(2*x) - 1
-        auto two = static_cast<ValueType>(2.0);
-        return (two * Sigmoid<ValueType>(two * a)) - static_cast<ValueType>(1);
-    }
-
-    template <typename ValueType, utilities::IsFundamental<ValueType> /* = true*/>
-    IRLocalScalar Min(ValueType value, IRLocalScalar b)
-    {
-        return Min(detail::ToIRLocalScalar(b.function, value), b);
-    }
-
-    template <typename ValueType, utilities::IsFundamental<ValueType> /* = true*/>
-    IRLocalScalar Min(IRLocalScalar a, ValueType value)
-    {
-        return Min(value, a);
-    }
-
-    template <typename ValueType, utilities::IsFundamental<ValueType> /* = true*/>
-    IRLocalScalar Max(ValueType value, IRLocalScalar b)
-    {
-        return Max(detail::ToIRLocalScalar(b.function, value), b);
-    }
-
-    template <typename ValueType, utilities::IsFundamental<ValueType> /* = true*/>
-    IRLocalScalar Max(IRLocalScalar a, ValueType value)
-    {
-        return Max(value, a);
-    }
-} // namespace emitters
-} // namespace ell
diff --git a/libraries/emitters/tcc/IRModuleEmitter.tcc b/libraries/emitters/tcc/IRModuleEmitter.tcc
deleted file mode 100644
index d798d19da..000000000
--- a/libraries/emitters/tcc/IRModuleEmitter.tcc
+++ /dev/null
@@ -1,201 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     IRModuleEmitter.tcc (emitters)
-//  Authors:  Umesh Madan
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace emitters
-{
-    //
-    // Public methods
-    //
-    template <typename ValueType>
-    llvm::GlobalVariable* IRModuleEmitter::Constant(const std::string& name, ValueType value)
-    {
-        return AddGlobal(name, _emitter.Type(GetVariableType<ValueType>()), _emitter.Literal(value), true);
-    }
-
-    template <typename ValueType>
-    llvm::GlobalVariable* IRModuleEmitter::Global(const std::string& name, ValueType value)
-    {
-        return AddGlobal(name, _emitter.Type(GetVariableType<ValueType>()), _emitter.Literal(value), false);
-    }
-
-    template <typename ValueType>
-    llvm::GlobalVariable* IRModuleEmitter::ConstantArray(const std::string& name, const std::vector<ValueType>& value)
-    {
-        return AddGlobal(name, _emitter.ArrayType(GetVariableType<ValueType>(), value.size()), _emitter.Literal(value), true);
-    }
-
-    template <typename ValueType>
-    llvm::GlobalVariable* IRModuleEmitter::GlobalArray(const std::string& name, size_t size)
-    {
-        return GlobalArray(GetVariableType<ValueType>(), name, size);
-    }
-
-    template <typename ValueType>
-    llvm::GlobalVariable* IRModuleEmitter::GlobalArray(const std::string& name, const std::vector<ValueType>& value)
-    {
-        return AddGlobal(name, _emitter.ArrayType(GetVariableType<ValueType>(), value.size()), _emitter.Literal(value), false);
-    }
-
-    //
-    // Private methods
-    //
-
-    template <typename T>
-    LLVMValue IRModuleEmitter::EmitVariable(Variable& var)
-    {
-        // TODO: have a more specific check to see if the variable is mapped to a port, rather than if it's a function input/output
-        if (var.IsScalar() && (var.Scope() != VariableScope::input && var.Scope() != VariableScope::output))
-        {
-            return EmitScalar<T>(var);
-        }
-        else if (var.IsVector())
-        {
-            return EmitVector<T>(var);
-        }
-        else
-        {
-            throw EmitterException(EmitterError::variableTypeNotSupported);
-        }
-    }
-
-    template <typename T>
-    LLVMValue IRModuleEmitter::EmitScalar(Variable& var)
-    {
-        LLVMValue pVal = nullptr;
-        switch (var.Scope())
-        {
-        case VariableScope::literal:
-            pVal = EmitLiteral<T>(static_cast<LiteralVariable<T>&>(var));
-            _literals.Add(var.EmittedName(), pVal);
-            break;
-
-        case VariableScope::local:
-            if (var.IsVectorRef())
-            {
-                pVal = EmitRef<T>(static_cast<VectorElementVariable<T>&>(var));
-            }
-            else if (var.HasInitValue())
-            {
-                pVal = EmitLocal<T>(static_cast<InitializedScalarVariable<T>&>(var));
-            }
-            else
-            {
-                pVal = EmitLocal<T>(static_cast<ScalarVariable<T>&>(var));
-            }
-            break;
-
-        case VariableScope::global:
-            pVal = EmitGlobal<T>(static_cast<InitializedScalarVariable<T>&>(var));
-            break;
-
-        default:
-            throw EmitterException(EmitterError::variableScopeNotSupported);
-        }
-        return pVal;
-    }
-
-    template <typename T>
-    LLVMValue IRModuleEmitter::EmitVector(Variable& var)
-    {
-        LLVMValue pVal = nullptr;
-        switch (var.Scope())
-        {
-        case VariableScope::literal:
-            pVal = EmitLiteralVector<T>(static_cast<LiteralVectorVariable<T>&>(var));
-            _literals.Add(var.EmittedName(), pVal);
-            break;
-
-        case VariableScope::global:
-            if (var.HasInitValue())
-            {
-                pVal = EmitGlobalVector<T>(static_cast<InitializedVectorVariable<T>&>(var));
-            }
-            else
-            {
-                pVal = EmitGlobalVector<T>(static_cast<VectorVariable<T>&>(var));
-            }
-            _globals.Add(var.EmittedName(), pVal);
-            break;
-
-        default:
-            throw EmitterException(EmitterError::variableScopeNotSupported);
-        }
-        assert(pVal != nullptr);
-        return pVal;
-    }
-
-    template <typename T>
-    LLVMValue IRModuleEmitter::EmitLiteral(LiteralVariable<T>& var)
-    {
-        auto& currentFunction = GetCurrentFunction();
-        LLVMValue pVar = currentFunction.Literal(var.Data());
-        return pVar;
-    }
-
-    template <typename T>
-    LLVMValue IRModuleEmitter::EmitGlobal(InitializedScalarVariable<T>& var)
-    {
-        auto& currentFunction = GetCurrentFunction();
-        LLVMValue pVal = nullptr;
-        if (var.IsMutable())
-        {
-            pVal = Global(var.Type(), var.EmittedName());
-            currentFunction.Store(pVal, currentFunction.Literal(var.Data()));
-        }
-        else
-        {
-            pVal = Constant(var.Type(), var.EmittedName(), var.Data());
-        }
-        return pVal;
-    }
-
-    template <typename T>
-    LLVMValue IRModuleEmitter::EmitLocal(ScalarVariable<T>& var)
-    {
-        auto& currentFunction = GetCurrentFunction();
-        return currentFunction.EmittedVariable(var.Type(), var.EmittedName());
-    }
-
-    template <typename T>
-    LLVMValue IRModuleEmitter::EmitLocal(InitializedScalarVariable<T>& var)
-    {
-        auto& currentFunction = GetCurrentFunction();
-        LLVMValue pVar = currentFunction.EmittedVariable(var.Type(), var.EmittedName());
-        currentFunction.Store(pVar, currentFunction.Literal(var.Data()));
-        return pVar;
-    }
-
-    template <typename T>
-    LLVMValue IRModuleEmitter::EmitLiteralVector(LiteralVectorVariable<T>& var)
-    {
-        return ConstantArray(var.EmittedName(), var.Data());
-    }
-
-    template <typename T>
-    LLVMValue IRModuleEmitter::EmitGlobalVector(VectorVariable<T>& var)
-    {
-        return GlobalArray(GetVariableType<T>(), var.EmittedName(), var.Dimension());
-    }
-
-    template <typename T>
-    LLVMValue IRModuleEmitter::EmitGlobalVector(InitializedVectorVariable<T>& var)
-    {
-        return GlobalArray(var.EmittedName(), var.Data());
-    }
-
-    template <typename T>
-    LLVMValue IRModuleEmitter::EmitRef(VectorElementVariable<T>& var)
-    {
-        auto& currentFunction = GetCurrentFunction();
-        LLVMValue pSrcVar = EnsureEmitted(var.Src());
-        return currentFunction.PtrOffsetA(pSrcVar, currentFunction.Literal(var.Offset()), var.EmittedName());
-    }
-} // namespace emitters
-} // namespace ell
diff --git a/libraries/emitters/tcc/IRRuntime.tcc b/libraries/emitters/tcc/IRRuntime.tcc
deleted file mode 100644
index 014acfe46..000000000
--- a/libraries/emitters/tcc/IRRuntime.tcc
+++ /dev/null
@@ -1,80 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     IRRuntime.tcc (emitters)
-//  Authors:  Umesh Madan
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace emitters
-{
-    template <typename ValueType>
-    LLVMFunction IRRuntime::GetSqrtFunction()
-    {
-        return GetSqrtFunction(GetVariableType<ValueType>());
-    }
-
-    template <typename ValueType>
-    LLVMFunction IRRuntime::GetAbsFunction()
-    {
-        return GetAbsFunction(GetVariableType<ValueType>());
-    }
-
-    template <typename ValueType>
-    LLVMFunction IRRuntime::GetExpFunction()
-    {
-        return GetExpFunction(GetVariableType<ValueType>());
-    }
-
-    template <typename ValueType>
-    LLVMFunction IRRuntime::GetLogFunction()
-    {
-        return GetLogFunction(GetVariableType<ValueType>());
-    }
-
-    template <typename ValueType>
-    LLVMFunction IRRuntime::GetTanhFunction()
-    {
-        return GetTanhFunction(GetVariableType<ValueType>());
-    }
-
-    template <typename ValueType>
-    LLVMFunction IRRuntime::GetSinFunction()
-    {
-        return GetSinFunction(GetVariableType<ValueType>());
-    }
-
-    template <typename ValueType>
-    LLVMFunction IRRuntime::GetCosFunction()
-    {
-        return GetCosFunction(GetVariableType<ValueType>());
-    }
-
-    template <typename ValueType>
-    LLVMFunction IRRuntime::GetDotProductFunction()
-    {
-        if (std::is_integral<std::decay_t<ValueType>>::value)
-        {
-            if (_dotProductFunction == nullptr)
-            {
-                _dotProductFunction = GetDotProductIntFunction();
-            }
-            return _dotProductFunction;
-        }
-        else if (std::is_floating_point<std::decay_t<ValueType>>::value)
-        {
-            if (_dotProductFunctionFloat == nullptr)
-            {
-                _dotProductFunctionFloat = GetDotProductFloatFunction();
-            }
-            return _dotProductFunctionFloat;
-        }
-        else
-        {
-            throw utilities::InputException(utilities::InputExceptionErrors::typeMismatch);
-        }
-    }
-} // namespace emitters
-} // namespace ell
diff --git a/libraries/emitters/tcc/IRVectorUtilities.tcc b/libraries/emitters/tcc/IRVectorUtilities.tcc
deleted file mode 100644
index bd5d34cd5..000000000
--- a/libraries/emitters/tcc/IRVectorUtilities.tcc
+++ /dev/null
@@ -1,81 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     IRVectorUtilities.tcc (emitters)
-//  Authors:  Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace emitters
-{
-    template <typename ValueType, utilities::IsFloatingPoint<ValueType>>
-    LLVMValue FillVector(IRFunctionEmitter& function, llvm::VectorType* type, ValueType elementValue)
-    {
-        return llvm::ConstantFP::get(type, elementValue);
-    }
-
-    template <typename ValueType, utilities::IsIntegral<ValueType>>
-    LLVMValue FillVector(IRFunctionEmitter& function, llvm::VectorType* type, ValueType elementValue)
-    {
-        return llvm::ConstantInt::get(type, elementValue, true);
-    }
-
-    // Emit explicit vectorized code to compute the sum of all the elements in a vector.
-    // Hopefully, the vecorizing optimizer will take care of this when vecorizing simple
-    // loops to sum up values, but for other operations we may want to do it ourselves.
-    //
-    // Runs in logN time by recursively splitting the vector in half and summing the halves.
-    // Example:
-    //   <1, 2, 3, 4, 5, 6, 7, 8> --> <1, 2, 3, 4> + <5, 6, 7, 8>    ( == <6, 8, 10, 12> )
-    //   <6, 8, 10, 12> --> <6, 8> + <10, 12>    ( == <16, 20> )
-    //   <16, 20> --> 16 + 20    ( == 36 )
-    template <typename ValueType>
-    LLVMValue HorizontalVectorSum(IRFunctionEmitter& function, LLVMValue vectorValue)
-    {
-        LLVMType type = vectorValue->getType();
-
-        // Allow calling HorizontalVectorSum to be a no-op on a scalar
-        if (!type->isVectorTy())
-        {
-            return vectorValue;
-        }
-
-        llvm::VectorType* vecType = llvm::cast<llvm::VectorType>(type);
-        assert(vecType != nullptr);
-
-        int vectorSize = vecType->getNumElements();
-        IREmitter& emitter = function.GetEmitter();
-
-        // Take care of the edge case of 1-element vectors
-        if (vectorSize == 1)
-        {
-            return emitter.GetIRBuilder().CreateExtractElement(vectorValue, static_cast<uint64_t>(0));
-        }
-
-        // Repeatedly split the vector into two halves, and add the two halves together
-        auto undef = llvm::UndefValue::get(type); // This undef is to tell LLVM we don't care what goes in the second operand of the shufflevector instruction
-        while (vectorSize > 2)
-        {
-            assert(vectorSize % 2 == 0); // vectorSize must be a power of 2
-            std::vector<uint32_t> elementIndices1;
-            std::vector<uint32_t> elementIndices2;
-            for (int index = 0; index < vectorSize / 2; ++index)
-            {
-                elementIndices1.push_back(index); // Collect indices [0, vectorSize/2)
-                elementIndices2.push_back((vectorSize / 2) + index); // Collect indices [vectorSize/2, vectorSize)
-            }
-            auto half1 = emitter.GetIRBuilder().CreateShuffleVector(vectorValue, undef, elementIndices1); // Extract elements [0, vectorSize/2)
-            auto half2 = emitter.GetIRBuilder().CreateShuffleVector(vectorValue, undef, elementIndices2); // Extract elements [vectorSize/2, vectorSize)
-            vectorValue = function.Operator(emitters::GetAddForValueType<ValueType>(), half1, half2);
-            vectorSize /= 2;
-        }
-
-        assert(vectorSize == 2);
-        auto half1 = emitter.GetIRBuilder().CreateExtractElement(vectorValue, static_cast<uint64_t>(0));
-        auto half2 = emitter.GetIRBuilder().CreateExtractElement(vectorValue, static_cast<uint64_t>(1));
-        return function.Operator(emitters::GetAddForValueType<ValueType>(), half1, half2);
-    }
-} // namespace emitters
-} // namespace ell
diff --git a/libraries/emitters/tcc/ScalarVariable.tcc b/libraries/emitters/tcc/ScalarVariable.tcc
deleted file mode 100644
index fb8eb5527..000000000
--- a/libraries/emitters/tcc/ScalarVariable.tcc
+++ /dev/null
@@ -1,41 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     ScalarVariable.tcc (emitters)
-//  Authors:  Umesh Madan
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace emitters
-{
-    template <typename T>
-    ScalarVariable<T>::ScalarVariable(const VariableScope scope, int flags) :
-        Variable(GetVariableType<T>(), scope, flags)
-    {
-    }
-
-    template <typename T>
-    LiteralVariable<T>::LiteralVariable(T data) :
-        ScalarVariable<T>(VariableScope::literal, Variable::VariableFlags::none),
-        _data(data)
-    {
-    }
-
-    template <typename T>
-    InitializedScalarVariable<T>::InitializedScalarVariable(const VariableScope scope, T data, bool isMutable) :
-        ScalarVariable<T>(scope, isMutable ? (Variable::VariableFlags::isMutable | Variable::VariableFlags::hasInitValue) : Variable::VariableFlags::hasInitValue),
-        _data(data)
-    {
-    }
-
-    template <typename T>
-    VectorElementVariable<T>::VectorElementVariable(Variable& src, int offset) :
-        ScalarVariable<T>(VariableScope::local, Variable::VariableFlags::isVectorRef),
-        _src(src),
-        _offset(offset)
-    {
-    }
-} // namespace emitters
-} // namespace ell
\ No newline at end of file
diff --git a/libraries/emitters/tcc/SymbolTable.tcc b/libraries/emitters/tcc/SymbolTable.tcc
deleted file mode 100644
index d052f61cf..000000000
--- a/libraries/emitters/tcc/SymbolTable.tcc
+++ /dev/null
@@ -1,79 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     SymbolTable.tcc (emitters)
-//  Authors:  Umesh Madan
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace emitters
-{
-    template <typename ValueType, ValueType DefaultValue>
-    SymbolTable<ValueType, DefaultValue>::SymbolTable(std::initializer_list<SymbolValue> values) :
-        _map(values)
-    {
-    }
-
-    template <typename ValueType, ValueType DefaultValue>
-    ValueType SymbolTable<ValueType, DefaultValue>::Get(const std::string& name) const
-    {
-        ValueType value = DefaultValue;
-        auto search = _map.find(name);
-        if (search != _map.end())
-        {
-            value = search->second;
-        }
-        return value;
-    }
-
-    template <typename ValueType, ValueType DefaultValue>
-    void SymbolTable<ValueType, DefaultValue>::Add(const std::string& name, ValueType value)
-    {
-        if (_map.count(name) > 0)
-        {
-            throw EmitterException(EmitterError::duplicateSymbol);
-        }
-        _map[std::move(name)] = std::move(value);
-    }
-
-    template <typename ValueType, ValueType DefaultValue>
-    std::string SymbolTable<ValueType, DefaultValue>::GetUniqueName(const std::string& namePrefix) const
-    {
-        // return namePrefix;
-        int index = 0;
-        while (true)
-        {
-            auto name = namePrefix + "_" + std::to_string(index);
-            if (!Contains(name))
-            {
-                return name;
-            }
-            ++index;
-        }
-    }
-
-    template <typename ValueType, ValueType DefaultValue>
-    bool SymbolTable<ValueType, DefaultValue>::Contains(const std::string& name) const
-    {
-        return (Get(name) != DefaultValue);
-    }
-
-    template <typename ValueType, ValueType DefaultValue>
-    void SymbolTable<ValueType, DefaultValue>::Remove(const std::string& name)
-    {
-        auto search = _map.find(name);
-        if (search != _map.end())
-        {
-            _map.erase(search);
-        }
-    }
-
-    template <typename ValueType, ValueType DefaultValue>
-    void SymbolTable<ValueType, DefaultValue>::Clear()
-    {
-        _map.clear();
-    }
-} // namespace emitters
-} // namespace ell
diff --git a/libraries/emitters/tcc/Variable.tcc b/libraries/emitters/tcc/Variable.tcc
deleted file mode 100644
index 2650a0bc8..000000000
--- a/libraries/emitters/tcc/Variable.tcc
+++ /dev/null
@@ -1,73 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     Variable.tcc (emitters)
-//  Authors:  Umesh Madan
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace emitters
-{
-
-    template <typename T>
-    class ScalarVariable;
-
-    template <typename T>
-    class InitializedScalarVariable;
-
-    template <typename T>
-    class VectorVariable;
-
-    template <typename T>
-    class InitializedVectorVariable;
-
-    //
-    // VariableAllocator
-    //
-    template <typename VarType, typename... Args>
-    VarType* VariableAllocator::AddVariable(Args&&... args)
-    {
-        static_assert(std::is_base_of<Variable, VarType>::value, "AddVariable requires you inherit from Variable");
-
-        auto var = std::make_shared<VarType>(args...);
-        VarType* pVar = var.get();
-        _variables.push_back(var);
-        return pVar;
-    }
-
-    template <typename ElementType>
-    Variable* VariableAllocator::AddScalarVariable(VariableScope scope)
-    {
-        return AddVariable<ScalarVariable<ElementType>>(scope);
-    }
-
-    template <typename ElementType>
-    Variable* VariableAllocator::AddScalarVariable(VariableScope scope, ElementType value)
-    {
-        return AddVariable<InitializedScalarVariable<ElementType>>(scope, value);
-    }
-
-    template <typename ElementType>
-    Variable* VariableAllocator::AddVectorVariable(VariableScope scope, int size)
-    {
-        return AddVariable<VectorVariable<ElementType>>(scope, size);
-    }
-
-    /// <summary> Add a vector, with all elements initialized to a given value </summary>
-    template <typename ElementType>
-    Variable* VariableAllocator::AddVectorVariable(VariableScope scope, int size, ElementType value)
-    {
-        std::vector<ElementType> data(size, value);
-        return AddVariable<InitializedVectorVariable<ElementType>>(scope, data);
-    }
-
-    /// <summary> Add a vector, initialized to a given vector </summary>
-    template <typename ElementType>
-    Variable* VariableAllocator::AddVectorVariable(VariableScope scope, const std::vector<ElementType>& data)
-    {
-        return AddVariable<InitializedVectorVariable<ElementType>>(scope, data);
-    }
-} // namespace emitters
-} // namespace ell
diff --git a/libraries/emitters/tcc/VectorVariable.tcc b/libraries/emitters/tcc/VectorVariable.tcc
deleted file mode 100644
index c587010eb..000000000
--- a/libraries/emitters/tcc/VectorVariable.tcc
+++ /dev/null
@@ -1,54 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     VectorVariable.tcc (emitters)
-//  Authors:  Umesh Madan
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace emitters
-{
-    //
-    // VectorVariable
-    //
-    template <typename T>
-    VectorVariable<T>::VectorVariable(const VariableScope scope, const size_t size, int flags) :
-        Variable(GetVariableType<T>(), scope, flags),
-        _size(size)
-    {
-    }
-
-    //
-    // InitializedVectorVariable
-    //
-    template <typename T>
-    InitializedVectorVariable<T>::InitializedVectorVariable(const VariableScope scope, const std::vector<T>& data, int flags) :
-        VectorVariable<T>(scope, data.size(), flags | Variable::VariableFlags::hasInitValue)
-    {
-        _initialData = VariableValueType<T>::ToVariableVector(data);
-    }
-
-    template <typename T>
-    InitializedVectorVariable<T>::InitializedVectorVariable(const VariableScope scope, size_t size, int flags) :
-        VectorVariable<T>(scope, size, flags | Variable::VariableFlags::hasInitValue)
-    {
-        T defValue = GetDefaultValue<ElementType>();
-        for (size_t i = 0; i < size; ++i)
-        {
-            _initialData.emplace_back(defValue);
-        }
-    }
-
-    //
-    // LiteralVectorVariable
-    //
-    template <typename T>
-    LiteralVectorVariable<T>::LiteralVectorVariable(const std::vector<T>& data) :
-        VectorVariable<T>(VariableScope::literal, data.size(), Variable::VariableFlags::none)
-    {
-        _data = VariableValueType<T>::ToVariableVector(data);
-    }
-} // namespace emitters
-} // namespace ell
diff --git a/libraries/evaluators/CMakeLists.txt b/libraries/evaluators/CMakeLists.txt
index 658feaece..9e7df2225 100644
--- a/libraries/evaluators/CMakeLists.txt
+++ b/libraries/evaluators/CMakeLists.txt
@@ -13,15 +13,10 @@ set (include include/AUCAggregator.h
              include/IncrementalEvaluator.h
              include/LossAggregator.h)
 
-set (tcc tcc/Evaluator.tcc
-         tcc/IncrementalEvaluator.tcc
-         tcc/LossAggregator.tcc)
-
 source_group("src" FILES ${src})
 source_group("include" FILES ${include})
-source_group("tcc" FILES ${tcc})
 
-add_library(${library_name} ${src} ${include} ${tcc})
+add_library(${library_name} ${src} ${include})
 target_include_directories(${library_name} PRIVATE include ${ELL_LIBRARIES_DIR})
 target_link_libraries(${library_name} data)
 
diff --git a/libraries/evaluators/include/Evaluator.h b/libraries/evaluators/include/Evaluator.h
index aa083e3fb..6178f66b1 100644
--- a/libraries/evaluators/include/Evaluator.h
+++ b/libraries/evaluators/include/Evaluator.h
@@ -175,4 +175,207 @@ namespace evaluators
 } // namespace evaluators
 } // namespace ell
 
-#include "../tcc/Evaluator.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace evaluators
+{
+    template <typename PredictorType, typename... AggregatorTypes>
+    Evaluator<PredictorType, AggregatorTypes...>::Evaluator(const data::AnyDataset& anyDataset, const EvaluatorParameters& evaluatorParameters, AggregatorTypes... aggregators) :
+        _dataset(anyDataset),
+        _evaluatorParameters(evaluatorParameters),
+        _aggregatorTuple(std::make_tuple(aggregators...))
+    {
+        static_assert(sizeof...(AggregatorTypes) > 0, "Evaluator must contains at least one aggregator");
+
+        if (_evaluatorParameters.addZeroEvaluation)
+        {
+            EvaluateZero();
+        }
+    }
+
+    template <typename PredictorType, typename... AggregatorTypes>
+    void Evaluator<PredictorType, AggregatorTypes...>::Evaluate(const PredictorType& predictor)
+    {
+        ++_evaluateCounter;
+        if (_evaluateCounter % _evaluatorParameters.evaluationFrequency != 0)
+        {
+            return;
+        }
+
+        auto iterator = _dataset.GetExampleReferenceIterator();
+
+        while (iterator.IsValid())
+        {
+            const auto& example = iterator.Get();
+
+            double weight = example.GetMetadata().weight;
+            double label = example.GetMetadata().label;
+            double prediction = predictor.Predict(example.GetDataVector());
+
+            DispatchUpdate(prediction, label, weight, std::make_index_sequence<sizeof...(AggregatorTypes)>());
+            iterator.Next();
+        }
+        Aggregate(std::make_index_sequence<sizeof...(AggregatorTypes)>());
+    }
+
+    template <typename PredictorType, typename... AggregatorTypes>
+    double Evaluator<PredictorType, AggregatorTypes...>::GetGoodness() const
+    {
+        if (_values.size() == 0)
+        {
+            return 0.0;
+        }
+        return _values.back()[0][0];
+    }
+
+    template <typename T>
+    void PrintVector(std::ostream& os, const std::vector<T>& v)
+    {
+        if (v.size() == 0) return;
+
+        os << v[0];
+        for (size_t j = 1; j < v.size(); ++j)
+        {
+            os << '\t' << v[j];
+        }
+    }
+
+    template <typename T>
+    std::vector<T> FlattenJaggedVector(const std::vector<std::vector<T>>& v)
+    {
+        std::vector<T> concat;
+        auto iter = v.cbegin();
+        auto end = v.end();
+        while (iter != end)
+        {
+            concat.insert(concat.end(), iter->cbegin(), iter->cend());
+            ++iter;
+        }
+        return concat;
+    }
+
+    template <typename PredictorType, typename... AggregatorTypes>
+    void Evaluator<PredictorType, AggregatorTypes...>::Print(std::ostream& os) const
+    {
+        auto originalPrecision = os.precision(6);
+        auto originalFlags = os.setf(std::ios::fixed);
+
+        PrintVector(os, FlattenJaggedVector(GetValueNames()));
+
+        for (const auto& values : _values)
+        {
+            os << std::endl;
+            PrintVector(os, FlattenJaggedVector(values));
+        }
+
+        os.setf(originalFlags);
+        os.precision(originalPrecision);
+    }
+
+    template <typename PredictorType, typename... AggregatorTypes>
+    void Evaluator<PredictorType, AggregatorTypes...>::EvaluateZero()
+    {
+        auto iterator = _dataset.GetExampleIterator();
+
+        while (iterator.IsValid())
+        {
+            const auto& example = iterator.Get();
+
+            double weight = example.GetMetadata().weight;
+            double label = example.GetMetadata().label;
+
+            DispatchUpdate(0.0, label, weight, std::make_index_sequence<sizeof...(AggregatorTypes)>());
+            iterator.Next();
+        }
+        Aggregate(std::make_index_sequence<sizeof...(AggregatorTypes)>());
+    }
+
+    template <typename PredictorType, typename... AggregatorTypes>
+    template <typename AggregatorT>
+    Evaluator<PredictorType, AggregatorTypes...>::ElementUpdater<AggregatorT>::ElementUpdater(AggregatorT& aggregator, const ElementUpdaterParameters& params) :
+        _params(params),
+        _aggregator(aggregator)
+    {
+    }
+
+    template <typename PredictorType, typename... AggregatorTypes>
+    template <typename AggregatorT>
+    void Evaluator<PredictorType, AggregatorTypes...>::ElementUpdater<AggregatorT>::operator()()
+    {
+        _aggregator.Update(_params.prediction, _params.label, _params.weight);
+    }
+
+    template <typename PredictorType, typename... AggregatorTypes>
+    template <typename AggregatorT>
+    Evaluator<PredictorType, AggregatorTypes...>::ElementResetter<AggregatorT>::ElementResetter(AggregatorT& aggregator) :
+        _aggregator(aggregator)
+    {
+    }
+
+    template <typename PredictorType, typename... AggregatorTypes>
+    template <typename AggregatorT>
+    void Evaluator<PredictorType, AggregatorTypes...>::ElementResetter<AggregatorT>::operator()()
+    {
+        _aggregator.Reset();
+    }
+
+    template <typename PredictorType, typename... AggregatorTypes>
+    template <std::size_t Index>
+    auto Evaluator<PredictorType, AggregatorTypes...>::GetElementUpdateFunction(const ElementUpdaterParameters& params) -> ElementUpdater<AggregatorType<Index>>
+    {
+        return { std::get<Index>(_aggregatorTuple), params };
+    }
+
+    template <typename PredictorType, typename... AggregatorTypes>
+    template <std::size_t Index>
+    auto Evaluator<PredictorType, AggregatorTypes...>::GetElementResetFunction() -> ElementResetter<AggregatorType<Index>>
+    {
+        return { std::get<Index>(_aggregatorTuple) };
+    }
+
+    template <typename PredictorType, typename... AggregatorTypes>
+    template <std::size_t... Sequence>
+    void Evaluator<PredictorType, AggregatorTypes...>::DispatchUpdate(double prediction, double label, double weight, std::index_sequence<Sequence...>)
+    {
+        // Call (X.Update(), 0) for each X in _aggregatorTuple
+        ElementUpdaterParameters params{ prediction, label, weight };
+        utilities::InOrderFunctionEvaluator(GetElementUpdateFunction<Sequence>(params)...);
+        // [this, prediction, label, weight]() { std::get<Sequence>(_aggregatorTuple).Update(prediction, label, weight); }...); // GCC bug prevents compilation
+    }
+
+    template <typename PredictorType, typename... AggregatorTypes>
+    template <std::size_t... Sequence>
+    void Evaluator<PredictorType, AggregatorTypes...>::Aggregate(std::index_sequence<Sequence...>)
+    {
+        // Call X.GetResult() for each X in _aggregatorTuple
+        _values.push_back({ std::get<Sequence>(_aggregatorTuple).GetResult()... });
+
+        // Call X.Reset() for each X in _aggregatorTuple
+        utilities::InOrderFunctionEvaluator(GetElementResetFunction<Sequence>()...);
+        // utilities::InOrderFunctionEvaluator([this]() { std::get<Sequence>(_aggregatorTuple).Reset(); }...); // GCC bug prevents compilation
+    }
+
+    template <typename PredictorType, typename... AggregatorTypes>
+    std::vector<std::vector<std::string>> Evaluator<PredictorType, AggregatorTypes...>::GetValueNames() const
+    {
+        return DispatchGetValueNames(std::make_index_sequence<sizeof...(AggregatorTypes)>());
+    }
+
+    template <typename PredictorType, typename... AggregatorTypes>
+    template <std::size_t... Sequence>
+    std::vector<std::vector<std::string>> Evaluator<PredictorType, AggregatorTypes...>::DispatchGetValueNames(std::index_sequence<Sequence...>) const
+    {
+        return { std::get<Sequence>(_aggregatorTuple).GetValueNames()... };
+    }
+
+    template <typename PredictorType, typename... AggregatorTypes>
+    std::shared_ptr<IEvaluator<PredictorType>> MakeEvaluator(const data::AnyDataset& anyDataset, const EvaluatorParameters& evaluatorParameters, AggregatorTypes... aggregators)
+    {
+        return std::make_unique<Evaluator<PredictorType, AggregatorTypes...>>(anyDataset, evaluatorParameters, aggregators...);
+    }
+} // namespace evaluators
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/evaluators/include/IncrementalEvaluator.h b/libraries/evaluators/include/IncrementalEvaluator.h
index 4640b4368..b564dc739 100644
--- a/libraries/evaluators/include/IncrementalEvaluator.h
+++ b/libraries/evaluators/include/IncrementalEvaluator.h
@@ -98,4 +98,68 @@ namespace evaluators
 } // namespace evaluators
 } // namespace ell
 
-#include "../tcc/IncrementalEvaluator.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace evaluators
+{
+    template <typename BasePredictorType, typename... AggregatorTypes>
+    IncrementalEvaluator<BasePredictorType, AggregatorTypes...>::IncrementalEvaluator(const data::AnyDataset& anyDataset, const EvaluatorParameters& evaluatorParameters, AggregatorTypes... aggregators) :
+        Evaluator<BasePredictorType, AggregatorTypes...>(anyDataset, evaluatorParameters, aggregators...)
+    {
+        _predictions.resize(BaseClassType::_dataset.NumExamples());
+    }
+
+    template <typename BasePredictorType, typename... AggregatorTypes>
+    void IncrementalEvaluator<BasePredictorType, AggregatorTypes...>::IncrementalEvaluate(const BasePredictorType& basePredictor, double basePredictorWeight, double evaluationRescale)
+    {
+        ++BaseClassType::_evaluateCounter;
+        bool evaluate = BaseClassType::_evaluateCounter % BaseClassType::_evaluatorParameters.evaluationFrequency == 0 ? true : false;
+
+        auto iterator = BaseClassType::_dataset.GetExampleIterator();
+        size_t index = 0;
+
+        while (iterator.IsValid())
+        {
+            const auto& example = iterator.Get();
+
+            double exampleWeight = example.GetMetadata().weight;
+            double label = example.GetMetadata().label;
+            _predictions[index] += basePredictorWeight * basePredictor.Predict(example.GetDataVector());
+
+            if (evaluate)
+            {
+                BaseClassType::DispatchUpdate(_predictions[index] * evaluationRescale, label, exampleWeight, std::make_index_sequence<sizeof...(AggregatorTypes)>());
+            }
+
+            iterator.Next();
+            ++index;
+        }
+        if (evaluate)
+        {
+            BaseClassType::Aggregate(std::make_index_sequence<sizeof...(AggregatorTypes)>());
+        }
+    }
+
+    template <typename BasePredictorType, typename... AggregatorTypes>
+    double IncrementalEvaluator<BasePredictorType, AggregatorTypes...>::GetGoodness() const
+    {
+        return BaseClassType::GetGoodness();
+    }
+
+    template <typename BasePredictorType, typename... AggregatorTypes>
+    void IncrementalEvaluator<BasePredictorType, AggregatorTypes...>::Print(std::ostream& os) const
+    {
+        BaseClassType::Print(os);
+    }
+
+    template <typename BasePredictorType, typename... AggregatorTypes>
+    std::shared_ptr<IIncrementalEvaluator<BasePredictorType>> MakeIncrementalEvaluator(data::ExampleIterator<data::AutoSupervisedExample> exampleIterator, const EvaluatorParameters& evaluatorParameters, AggregatorTypes... aggregators)
+    {
+        return std::make_unique<IncrementalEvaluator<BasePredictorType, AggregatorTypes...>>(exampleIterator, evaluatorParameters, aggregators...);
+    }
+} // namespace evaluators
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/evaluators/include/LossAggregator.h b/libraries/evaluators/include/LossAggregator.h
index 52114ac7d..f1b34964c 100644
--- a/libraries/evaluators/include/LossAggregator.h
+++ b/libraries/evaluators/include/LossAggregator.h
@@ -58,4 +58,52 @@ namespace evaluators
 } // namespace evaluators
 } // namespace ell
 
-#include "../tcc/LossAggregator.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace evaluators
+{
+    template <typename LossFunctionType>
+    LossAggregator<LossFunctionType>::LossAggregator(LossFunctionType lossFunction) :
+        _lossFunction(std::move(lossFunction))
+    {
+    }
+
+    template <typename LossFunctionType>
+    void LossAggregator<LossFunctionType>::Update(double prediction, double label, double weight)
+    {
+        double loss = _lossFunction(prediction, label);
+        _sumWeights += weight;
+        _sumWeightedLosses += weight * loss;
+    }
+
+    template <typename LossFunctionType>
+    std::vector<double> LossAggregator<LossFunctionType>::GetResult() const
+    {
+        double meanLoss = _sumWeights == 0.0 ? 0.0 : _sumWeightedLosses / _sumWeights;
+        return { meanLoss };
+    }
+
+    template <typename LossFunctionType>
+    void LossAggregator<LossFunctionType>::Reset()
+    {
+        _sumWeights = 0.0;
+        _sumWeightedLosses = 0.0;
+    }
+
+    template <typename LossFunctionType>
+    std::vector<std::string> LossAggregator<LossFunctionType>::GetValueNames() const
+    {
+        return { "MeanLoss" };
+    }
+
+    template <typename LossFunctionType>
+    LossAggregator<LossFunctionType> MakeLossAggregator(LossFunctionType lossFunction)
+    {
+        return LossAggregator<LossFunctionType>(std::move(lossFunction));
+    }
+} // namespace evaluators
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/evaluators/tcc/Evaluator.tcc b/libraries/evaluators/tcc/Evaluator.tcc
deleted file mode 100644
index de08c9d4a..000000000
--- a/libraries/evaluators/tcc/Evaluator.tcc
+++ /dev/null
@@ -1,208 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     Evaluator.tcc (evaluators)
-//  Authors:  Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace evaluators
-{
-    template <typename PredictorType, typename... AggregatorTypes>
-    Evaluator<PredictorType, AggregatorTypes...>::Evaluator(const data::AnyDataset& anyDataset, const EvaluatorParameters& evaluatorParameters, AggregatorTypes... aggregators) :
-        _dataset(anyDataset),
-        _evaluatorParameters(evaluatorParameters),
-        _aggregatorTuple(std::make_tuple(aggregators...))
-    {
-        static_assert(sizeof...(AggregatorTypes) > 0, "Evaluator must contains at least one aggregator");
-
-        if (_evaluatorParameters.addZeroEvaluation)
-        {
-            EvaluateZero();
-        }
-    }
-
-    template <typename PredictorType, typename... AggregatorTypes>
-    void Evaluator<PredictorType, AggregatorTypes...>::Evaluate(const PredictorType& predictor)
-    {
-        ++_evaluateCounter;
-        if (_evaluateCounter % _evaluatorParameters.evaluationFrequency != 0)
-        {
-            return;
-        }
-
-        auto iterator = _dataset.GetExampleReferenceIterator();
-
-        while (iterator.IsValid())
-        {
-            const auto& example = iterator.Get();
-
-            double weight = example.GetMetadata().weight;
-            double label = example.GetMetadata().label;
-            double prediction = predictor.Predict(example.GetDataVector());
-
-            DispatchUpdate(prediction, label, weight, std::make_index_sequence<sizeof...(AggregatorTypes)>());
-            iterator.Next();
-        }
-        Aggregate(std::make_index_sequence<sizeof...(AggregatorTypes)>());
-    }
-
-    template <typename PredictorType, typename... AggregatorTypes>
-    double Evaluator<PredictorType, AggregatorTypes...>::GetGoodness() const
-    {
-        if (_values.size() == 0)
-        {
-            return 0.0;
-        }
-        return _values.back()[0][0];
-    }
-
-    template <typename T>
-    void PrintVector(std::ostream& os, const std::vector<T>& v)
-    {
-        if (v.size() == 0) return;
-
-        os << v[0];
-        for (size_t j = 1; j < v.size(); ++j)
-        {
-            os << '\t' << v[j];
-        }
-    }
-
-    template <typename T>
-    std::vector<T> FlattenJaggedVector(const std::vector<std::vector<T>>& v)
-    {
-        std::vector<T> concat;
-        auto iter = v.cbegin();
-        auto end = v.end();
-        while (iter != end)
-        {
-            concat.insert(concat.end(), iter->cbegin(), iter->cend());
-            ++iter;
-        }
-        return concat;
-    }
-
-    template <typename PredictorType, typename... AggregatorTypes>
-    void Evaluator<PredictorType, AggregatorTypes...>::Print(std::ostream& os) const
-    {
-        auto originalPrecision = os.precision(6);
-        auto originalFlags = os.setf(std::ios::fixed);
-
-        PrintVector(os, FlattenJaggedVector(GetValueNames()));
-
-        for (const auto& values : _values)
-        {
-            os << std::endl;
-            PrintVector(os, FlattenJaggedVector(values));
-        }
-
-        os.setf(originalFlags);
-        os.precision(originalPrecision);
-    }
-
-    template <typename PredictorType, typename... AggregatorTypes>
-    void Evaluator<PredictorType, AggregatorTypes...>::EvaluateZero()
-    {
-        auto iterator = _dataset.GetExampleIterator();
-
-        while (iterator.IsValid())
-        {
-            const auto& example = iterator.Get();
-
-            double weight = example.GetMetadata().weight;
-            double label = example.GetMetadata().label;
-
-            DispatchUpdate(0.0, label, weight, std::make_index_sequence<sizeof...(AggregatorTypes)>());
-            iterator.Next();
-        }
-        Aggregate(std::make_index_sequence<sizeof...(AggregatorTypes)>());
-    }
-
-    template <typename PredictorType, typename... AggregatorTypes>
-    template <typename AggregatorT>
-    Evaluator<PredictorType, AggregatorTypes...>::ElementUpdater<AggregatorT>::ElementUpdater(AggregatorT& aggregator, const ElementUpdaterParameters& params) :
-        _params(params),
-        _aggregator(aggregator)
-    {
-    }
-
-    template <typename PredictorType, typename... AggregatorTypes>
-    template <typename AggregatorT>
-    void Evaluator<PredictorType, AggregatorTypes...>::ElementUpdater<AggregatorT>::operator()()
-    {
-        _aggregator.Update(_params.prediction, _params.label, _params.weight);
-    }
-
-    template <typename PredictorType, typename... AggregatorTypes>
-    template <typename AggregatorT>
-    Evaluator<PredictorType, AggregatorTypes...>::ElementResetter<AggregatorT>::ElementResetter(AggregatorT& aggregator) :
-        _aggregator(aggregator)
-    {
-    }
-
-    template <typename PredictorType, typename... AggregatorTypes>
-    template <typename AggregatorT>
-    void Evaluator<PredictorType, AggregatorTypes...>::ElementResetter<AggregatorT>::operator()()
-    {
-        _aggregator.Reset();
-    }
-
-    template <typename PredictorType, typename... AggregatorTypes>
-    template <std::size_t Index>
-    auto Evaluator<PredictorType, AggregatorTypes...>::GetElementUpdateFunction(const ElementUpdaterParameters& params) -> ElementUpdater<AggregatorType<Index>>
-    {
-        return { std::get<Index>(_aggregatorTuple), params };
-    }
-
-    template <typename PredictorType, typename... AggregatorTypes>
-    template <std::size_t Index>
-    auto Evaluator<PredictorType, AggregatorTypes...>::GetElementResetFunction() -> ElementResetter<AggregatorType<Index>>
-    {
-        return { std::get<Index>(_aggregatorTuple) };
-    }
-
-    template <typename PredictorType, typename... AggregatorTypes>
-    template <std::size_t... Sequence>
-    void Evaluator<PredictorType, AggregatorTypes...>::DispatchUpdate(double prediction, double label, double weight, std::index_sequence<Sequence...>)
-    {
-        // Call (X.Update(), 0) for each X in _aggregatorTuple
-        ElementUpdaterParameters params{ prediction, label, weight };
-        utilities::InOrderFunctionEvaluator(GetElementUpdateFunction<Sequence>(params)...);
-        // [this, prediction, label, weight]() { std::get<Sequence>(_aggregatorTuple).Update(prediction, label, weight); }...); // GCC bug prevents compilation
-    }
-
-    template <typename PredictorType, typename... AggregatorTypes>
-    template <std::size_t... Sequence>
-    void Evaluator<PredictorType, AggregatorTypes...>::Aggregate(std::index_sequence<Sequence...>)
-    {
-        // Call X.GetResult() for each X in _aggregatorTuple
-        _values.push_back({ std::get<Sequence>(_aggregatorTuple).GetResult()... });
-
-        // Call X.Reset() for each X in _aggregatorTuple
-        utilities::InOrderFunctionEvaluator(GetElementResetFunction<Sequence>()...);
-        // utilities::InOrderFunctionEvaluator([this]() { std::get<Sequence>(_aggregatorTuple).Reset(); }...); // GCC bug prevents compilation
-    }
-
-    template <typename PredictorType, typename... AggregatorTypes>
-    std::vector<std::vector<std::string>> Evaluator<PredictorType, AggregatorTypes...>::GetValueNames() const
-    {
-        return DispatchGetValueNames(std::make_index_sequence<sizeof...(AggregatorTypes)>());
-    }
-
-    template <typename PredictorType, typename... AggregatorTypes>
-    template <std::size_t... Sequence>
-    std::vector<std::vector<std::string>> Evaluator<PredictorType, AggregatorTypes...>::DispatchGetValueNames(std::index_sequence<Sequence...>) const
-    {
-        return { std::get<Sequence>(_aggregatorTuple).GetValueNames()... };
-    }
-
-    template <typename PredictorType, typename... AggregatorTypes>
-    std::shared_ptr<IEvaluator<PredictorType>> MakeEvaluator(const data::AnyDataset& anyDataset, const EvaluatorParameters& evaluatorParameters, AggregatorTypes... aggregators)
-    {
-        return std::make_unique<Evaluator<PredictorType, AggregatorTypes...>>(anyDataset, evaluatorParameters, aggregators...);
-    }
-} // namespace evaluators
-} // namespace ell
diff --git a/libraries/evaluators/tcc/IncrementalEvaluator.tcc b/libraries/evaluators/tcc/IncrementalEvaluator.tcc
deleted file mode 100644
index 506d7b986..000000000
--- a/libraries/evaluators/tcc/IncrementalEvaluator.tcc
+++ /dev/null
@@ -1,69 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     IncrementalEvaluator.tcc (evaluators)
-//  Authors:  Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace evaluators
-{
-    template <typename BasePredictorType, typename... AggregatorTypes>
-    IncrementalEvaluator<BasePredictorType, AggregatorTypes...>::IncrementalEvaluator(const data::AnyDataset& anyDataset, const EvaluatorParameters& evaluatorParameters, AggregatorTypes... aggregators) :
-        Evaluator<BasePredictorType, AggregatorTypes...>(anyDataset, evaluatorParameters, aggregators...)
-    {
-        _predictions.resize(BaseClassType::_dataset.NumExamples());
-    }
-
-    template <typename BasePredictorType, typename... AggregatorTypes>
-    void IncrementalEvaluator<BasePredictorType, AggregatorTypes...>::IncrementalEvaluate(const BasePredictorType& basePredictor, double basePredictorWeight, double evaluationRescale)
-    {
-        ++BaseClassType::_evaluateCounter;
-        bool evaluate = BaseClassType::_evaluateCounter % BaseClassType::_evaluatorParameters.evaluationFrequency == 0 ? true : false;
-
-        auto iterator = BaseClassType::_dataset.GetExampleIterator();
-        size_t index = 0;
-
-        while (iterator.IsValid())
-        {
-            const auto& example = iterator.Get();
-
-            double exampleWeight = example.GetMetadata().weight;
-            double label = example.GetMetadata().label;
-            _predictions[index] += basePredictorWeight * basePredictor.Predict(example.GetDataVector());
-
-            if (evaluate)
-            {
-                BaseClassType::DispatchUpdate(_predictions[index] * evaluationRescale, label, exampleWeight, std::make_index_sequence<sizeof...(AggregatorTypes)>());
-            }
-
-            iterator.Next();
-            ++index;
-        }
-        if (evaluate)
-        {
-            BaseClassType::Aggregate(std::make_index_sequence<sizeof...(AggregatorTypes)>());
-        }
-    }
-
-    template <typename BasePredictorType, typename... AggregatorTypes>
-    double IncrementalEvaluator<BasePredictorType, AggregatorTypes...>::GetGoodness() const
-    {
-        return BaseClassType::GetGoodness();
-    }
-
-    template <typename BasePredictorType, typename... AggregatorTypes>
-    void IncrementalEvaluator<BasePredictorType, AggregatorTypes...>::Print(std::ostream& os) const
-    {
-        BaseClassType::Print(os);
-    }
-
-    template <typename BasePredictorType, typename... AggregatorTypes>
-    std::shared_ptr<IIncrementalEvaluator<BasePredictorType>> MakeIncrementalEvaluator(data::ExampleIterator<data::AutoSupervisedExample> exampleIterator, const EvaluatorParameters& evaluatorParameters, AggregatorTypes... aggregators)
-    {
-        return std::make_unique<IncrementalEvaluator<BasePredictorType, AggregatorTypes...>>(exampleIterator, evaluatorParameters, aggregators...);
-    }
-} // namespace evaluators
-} // namespace ell
diff --git a/libraries/evaluators/tcc/LossAggregator.tcc b/libraries/evaluators/tcc/LossAggregator.tcc
deleted file mode 100644
index ab62b5ee8..000000000
--- a/libraries/evaluators/tcc/LossAggregator.tcc
+++ /dev/null
@@ -1,53 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     LossAggregator.tcc (evaluators)
-//  Authors:  Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace evaluators
-{
-    template <typename LossFunctionType>
-    LossAggregator<LossFunctionType>::LossAggregator(LossFunctionType lossFunction) :
-        _lossFunction(std::move(lossFunction))
-    {
-    }
-
-    template <typename LossFunctionType>
-    void LossAggregator<LossFunctionType>::Update(double prediction, double label, double weight)
-    {
-        double loss = _lossFunction(prediction, label);
-        _sumWeights += weight;
-        _sumWeightedLosses += weight * loss;
-    }
-
-    template <typename LossFunctionType>
-    std::vector<double> LossAggregator<LossFunctionType>::GetResult() const
-    {
-        double meanLoss = _sumWeights == 0.0 ? 0.0 : _sumWeightedLosses / _sumWeights;
-        return { meanLoss };
-    }
-
-    template <typename LossFunctionType>
-    void LossAggregator<LossFunctionType>::Reset()
-    {
-        _sumWeights = 0.0;
-        _sumWeightedLosses = 0.0;
-    }
-
-    template <typename LossFunctionType>
-    std::vector<std::string> LossAggregator<LossFunctionType>::GetValueNames() const
-    {
-        return { "MeanLoss" };
-    }
-
-    template <typename LossFunctionType>
-    LossAggregator<LossFunctionType> MakeLossAggregator(LossFunctionType lossFunction)
-    {
-        return LossAggregator<LossFunctionType>(std::move(lossFunction));
-    }
-} // namespace evaluators
-} // namespace ell
diff --git a/libraries/math/CMakeLists.txt b/libraries/math/CMakeLists.txt
index b06dd14e7..41fabdfc0 100644
--- a/libraries/math/CMakeLists.txt
+++ b/libraries/math/CMakeLists.txt
@@ -25,22 +25,13 @@ set(include include/BlasWrapper.h
              include/VectorOperations.h
 )
 
-set(tcc tcc/Matrix.tcc
-         tcc/MatrixOperations.tcc
-         tcc/Tensor.tcc
-         tcc/TensorOperations.tcc
-         tcc/Vector.tcc
-         tcc/VectorOperations.tcc
-)
-
 set(doc doc/README.md)
 
 source_group("src" FILES ${src})
 source_group("include" FILES ${include})
-source_group("tcc" FILES ${tcc})
 source_group("doc" FILES ${doc})
 
-add_library(${library_name} ${src} ${include} ${tcc} ${doc})
+add_library(${library_name} ${src} ${include} ${doc})
 target_include_directories(${library_name} PRIVATE include ${ELL_LIBRARIES_DIR})
 target_include_directories(${library_name} SYSTEM PUBLIC ${BLAS_INCLUDE_DIRS})
 target_link_libraries(${library_name} utilities ${BLAS_LIBS})
@@ -64,16 +55,10 @@ set(test_include test/include/math_profile.h
                   test/include/Tensor_test.h
                   test/include/Vector_test.h)
 
-set(test_tcc test/tcc/math_profile.tcc
-              test/tcc/Matrix_test.tcc
-              test/tcc/Tensor_test.tcc
-              test/tcc/Vector_test.tcc)
-
 source_group("src" FILES ${test_src})
 source_group("include" FILES ${test_include})
-source_group("tcc" FILES ${test_tcc})
 
-add_executable(${test_name} ${test_src} ${test_include} ${test_tcc} ${include})
+add_executable(${test_name} ${test_src} ${test_include} ${include})
 target_include_directories(${test_name} PRIVATE test/include ${ELL_LIBRARIES_DIR})
 target_link_libraries(${test_name} math testing)
 copy_shared_libraries(${test_name})
@@ -93,13 +78,10 @@ set(profile_src test/src/math_profile_main.cpp)
 
 set(profile_include test/include/math_profile.h)
 
-set(profile_tcc test/tcc/math_profile.tcc)
-
 source_group("src" FILES ${profile_src})
 source_group("include" FILES ${profile_include})
-source_group("tcc" FILES ${profile_tcc})
 
-add_executable(${profile_name} ${profile_src} ${profile_include} ${profile_tcc} ${include})
+add_executable(${profile_name} ${profile_src} ${profile_include} ${include})
 target_include_directories(${profile_name} PRIVATE test/include ${ELL_LIBRARIES_DIR})
 target_link_libraries(${profile_name} math testing)
 copy_shared_libraries(${profile_name})
diff --git a/libraries/math/include/Matrix.h b/libraries/math/include/Matrix.h
index b08a5e7f3..c8ea92dd0 100644
--- a/libraries/math/include/Matrix.h
+++ b/libraries/math/include/Matrix.h
@@ -649,4 +649,503 @@ namespace math
 } // namespace math
 } // namespace ell
 
-#include "../tcc/Matrix.tcc"
+#pragma region implementation
+
+#include "../include/VectorOperations.h"
+
+#include <utilities/include/Debug.h>
+#include <utilities/include/Exception.h>
+#include <utilities/include/Unused.h>
+
+#include <algorithm>
+
+namespace ell
+{
+namespace math
+{
+    //
+    // CommonMatrixBase
+    //
+
+    template <typename ElementType>
+    CommonMatrixBase<ElementType>::CommonMatrixBase(const ElementType* pData, size_t numRows, size_t numColumns, size_t increment) :
+        _pData(pData),
+        _numRows(numRows),
+        _numColumns(numColumns),
+        _increment(increment)
+    {
+    }
+
+    template <typename ElementType>
+    void CommonMatrixBase<ElementType>::Swap(CommonMatrixBase<ElementType>& other)
+    {
+        using std::swap;
+        swap(_pData, other._pData);
+        swap(_numRows, other._numRows);
+        swap(_numColumns, other._numColumns);
+        swap(_increment, other._increment);
+    }
+
+    //
+    // MatrixBase
+    //
+
+    // Row-major
+    template <typename ElementType>
+    MatrixBase<ElementType, MatrixLayout::rowMajor>::MatrixBase(const ElementType* pData, size_t numRows, size_t numColumns) :
+        CommonMatrixBase<ElementType>(pData, numRows, numColumns, numColumns)
+    {
+    }
+
+    template <typename ElementType>
+    MatrixBase<ElementType, MatrixLayout::rowMajor>::MatrixBase(const ElementType* pData, size_t numRows, size_t numColumns, size_t increment) :
+        CommonMatrixBase<ElementType>(pData, numRows, numColumns, increment)
+    {
+    }
+
+    template <typename ElementType>
+    void MatrixBase<ElementType, MatrixLayout::rowMajor>::Swap(MatrixBase<ElementType, MatrixLayout::rowMajor>& other)
+    {
+        CommonMatrixBase<ElementType>::Swap(other);
+    }
+
+    // Column-major
+    template <typename ElementType>
+    MatrixBase<ElementType, MatrixLayout::columnMajor>::MatrixBase(const ElementType* pData, size_t numRows, size_t numColumns) :
+        CommonMatrixBase<ElementType>(pData, numRows, numColumns, numRows)
+    {
+    }
+
+    template <typename ElementType>
+    MatrixBase<ElementType, MatrixLayout::columnMajor>::MatrixBase(const ElementType* pData, size_t numRows, size_t numColumns, size_t increment) :
+        CommonMatrixBase<ElementType>(pData, numRows, numColumns, increment)
+    {
+    }
+
+    template <typename ElementType>
+    void MatrixBase<ElementType, MatrixLayout::columnMajor>::Swap(MatrixBase<ElementType, MatrixLayout::columnMajor>& other)
+    {
+        CommonMatrixBase<ElementType>::Swap(other);
+    }
+
+    //
+    // ConstMatrixReference
+    //
+    template <typename ElementType, MatrixLayout layout>
+    ConstMatrixReference<ElementType, layout>::ConstMatrixReference(const ElementType* pData, size_t numRows, size_t numColumns, size_t increment) :
+        MatrixBase<ElementType, layout>(pData, numRows, numColumns, increment)
+    {
+    }
+
+    template <typename ElementType, MatrixLayout layout>
+    ConstMatrixReference<ElementType, layout>::ConstMatrixReference(const ElementType* pData, size_t numRows, size_t numColumns) :
+        MatrixBase<ElementType, layout>(pData, numRows, numColumns)
+    {
+    }
+
+    template <typename ElementType, MatrixLayout layout>
+    ElementType ConstMatrixReference<ElementType, layout>::operator()(size_t rowIndex, size_t columnIndex) const
+    {
+        using namespace std::string_literals;
+        DEBUG_THROW(rowIndex >= this->NumRows() || columnIndex >= this->NumColumns(), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "("s + std::to_string(rowIndex) + ", " + std::to_string(columnIndex) + ") exceeds matrix dimensions (" + std::to_string(this->NumRows()) + " x " + std::to_string(this->NumColumns()) + "."));
+
+        return GetConstDataPointer()[rowIndex * this->GetRowIncrement() + columnIndex * this->GetColumnIncrement()];
+    }
+
+    template <typename ElementType, MatrixLayout layout>
+    std::vector<ElementType> ConstMatrixReference<ElementType, layout>::ToArray() const
+    {
+        std::vector<ElementType> v(this->Size());
+        auto vIterator = v.begin();
+        for (size_t i = 0; i < this->GetMinorSize(); ++i)
+        {
+            auto pIntervalData = GetMajorVectorBegin(i);
+            std::copy(pIntervalData, pIntervalData + this->GetMajorSize(), vIterator);
+            vIterator += this->GetMajorSize();
+        }
+        return v;
+    }
+
+    template <typename ElementType, MatrixLayout layout>
+    void ConstMatrixReference<ElementType, layout>::Swap(ConstMatrixReference<ElementType, layout>& other)
+    {
+        MatrixBase<ElementType, layout>::Swap(other);
+    }
+
+    template <typename ElementType, MatrixLayout layout>
+    bool ConstMatrixReference<ElementType, layout>::IsEqual(ConstMatrixReference<ElementType, layout> other, ElementType tolerance) const
+    {
+        if (this->NumRows() != other.NumRows() || this->NumColumns() != other.NumColumns())
+        {
+            return false;
+        }
+
+        for (size_t i = 0; i < this->GetMinorSize(); ++i)
+        {
+            if (!GetMajorVector(i).IsEqual(other.GetMajorVector(i), tolerance))
+            {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    template <typename ElementType, MatrixLayout layout>
+    bool ConstMatrixReference<ElementType, layout>::IsEqual(ConstMatrixReference<ElementType, TransposeMatrixLayout<layout>::value> other, ElementType tolerance) const
+    {
+        if (this->NumRows() != other.NumRows() || this->NumColumns() != other.NumColumns())
+        {
+            return false;
+        }
+
+        for (size_t i = 0; i < this->NumRows(); ++i)
+        {
+            if (!GetRow(i).IsEqual(other.GetRow(i), tolerance))
+            {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    template <typename ElementType, MatrixLayout layout>
+    bool ConstMatrixReference<ElementType, layout>::operator==(const ConstMatrixReference<ElementType, layout>& other) const
+    {
+        return IsEqual(other);
+    }
+
+    template <typename ElementType, MatrixLayout layout>
+    bool ConstMatrixReference<ElementType, layout>::operator==(const ConstMatrixReference<ElementType, TransposeMatrixLayout<layout>::value>& other) const
+    {
+        return IsEqual(other);
+    }
+
+    template <typename ElementType, MatrixLayout layout>
+    template <MatrixLayout otherLayout>
+    bool ConstMatrixReference<ElementType, layout>::operator!=(const ConstMatrixReference<ElementType, otherLayout>& other)
+    {
+        return !(*this == other);
+    }
+
+    template <typename ElementType, MatrixLayout layout>
+    ConstMatrixReference<ElementType, layout> ConstMatrixReference<ElementType, layout>::GetSubMatrix(size_t firstRow, size_t firstColumn, size_t numRows, size_t numColumns) const
+    {
+        DEBUG_THROW(firstRow + numRows > this->NumRows() || firstColumn + numColumns > this->NumColumns(), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "block exceeds matrix dimensions."));
+
+        return ConstMatrixReference<ElementType, layout>(GetConstDataPointer() + firstRow * this->GetRowIncrement() + firstColumn * this->GetColumnIncrement(), numRows, numColumns, this->GetIncrement());
+    }
+
+    template <typename ElementType, MatrixLayout layout>
+    ConstColumnVectorReference<ElementType> ConstMatrixReference<ElementType, layout>::GetColumn(size_t index) const
+    {
+        DEBUG_THROW(index >= this->NumColumns(), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "column index exceeds matrix dimensions."));
+
+        return ConstColumnVectorReference<ElementType>(GetConstDataPointer() + index * this->GetColumnIncrement(), this->NumRows(), this->GetRowIncrement());
+    }
+
+    template <typename ElementType, MatrixLayout layout>
+    ConstRowVectorReference<ElementType> ConstMatrixReference<ElementType, layout>::GetRow(size_t index) const
+    {
+        DEBUG_THROW(index >= this->NumRows(), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "row index exceeds matrix dimensions."));
+
+        return ConstRowVectorReference<ElementType>(GetConstDataPointer() + index * this->GetRowIncrement(), this->NumColumns(), this->GetColumnIncrement());
+    }
+
+    template <typename ElementType, MatrixLayout layout>
+    ConstColumnVectorReference<ElementType> ConstMatrixReference<ElementType, layout>::GetDiagonal() const
+    {
+        auto size = std::min(this->NumColumns(), this->NumRows());
+        return ConstColumnVectorReference<ElementType>(GetConstDataPointer(), size, this->GetIncrement() + 1);
+    }
+
+    template <typename ElementType, MatrixLayout layout>
+    ConstColumnVectorReference<ElementType> ConstMatrixReference<ElementType, layout>::ReferenceAsVector() const
+    {
+        DEBUG_THROW(!IsContiguous(), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "Can only flatten a matrix when its memory is contiguous"));
+        return ConstColumnVectorReference<ElementType>(GetConstDataPointer(), this->NumRows() * this->NumColumns(), 1);
+    }
+
+    template <typename ElementType, MatrixLayout layout>
+    auto ConstMatrixReference<ElementType, layout>::Transpose() const -> ConstMatrixReference<ElementType, TransposeMatrixLayout<layout>::value>
+    {
+        return ConstMatrixReference<ElementType, TransposeMatrixLayout<layout>::value>(GetConstDataPointer(), this->NumColumns(), this->NumRows(), this->GetIncrement());
+    }
+
+    template <typename ElementType, MatrixLayout layout>
+    const ElementType* ConstMatrixReference<ElementType, layout>::GetMajorVectorBegin(size_t index) const
+    {
+        return GetConstDataPointer() + index * this->GetIncrement();
+    }
+
+    //
+    // MatrixReference
+    //
+
+    template <typename ElementType, MatrixLayout layout>
+    MatrixReference<ElementType, layout>::MatrixReference(ElementType* pData, size_t numRows, size_t numColumns, size_t increment) :
+        ConstMatrixReference<ElementType, layout>(pData, numRows, numColumns, increment)
+    {
+    }
+
+    template <typename ElementType, MatrixLayout layout>
+    MatrixReference<ElementType, layout>::MatrixReference(ElementType* pData, size_t numRows, size_t numColumns) :
+        ConstMatrixReference<ElementType, layout>(pData, numRows, numColumns)
+    {
+    }
+
+    template <typename ElementType, MatrixLayout layout>
+    ElementType& MatrixReference<ElementType, layout>::operator()(size_t rowIndex, size_t columnIndex)
+    {
+        using namespace std::string_literals;
+        DEBUG_THROW(rowIndex >= this->NumRows() || columnIndex >= this->NumColumns(), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "("s + std::to_string(rowIndex) + ", " + std::to_string(columnIndex) + ") exceeds matrix dimensions (" + std::to_string(this->NumRows()) + " x " + std::to_string(this->NumColumns()) + "."));
+
+        return GetDataPointer()[rowIndex * this->GetRowIncrement() + columnIndex * this->GetColumnIncrement()];
+    }
+
+    template <typename ElementType, MatrixLayout layout>
+    void MatrixReference<ElementType, layout>::CopyFrom(ConstMatrixReference<ElementType, layout> other)
+    {
+        if (this->NumRows() != other.NumRows() || this->NumColumns() != other.NumColumns())
+        {
+            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "Matrix dimensions are not the same.");
+        }
+
+        for (size_t i = 0; i < other.GetMinorSize(); ++i)
+        {
+            GetMajorVector(i).CopyFrom(other.GetMajorVector(i));
+        }
+    }
+
+    template <typename ElementType, MatrixLayout layout>
+    void MatrixReference<ElementType, layout>::CopyFrom(ConstMatrixReference<ElementType, TransposeMatrixLayout<layout>::value> other)
+    {
+        if (this->NumRows() != other.NumRows() || this->NumColumns() != other.NumColumns())
+        {
+            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "Matrix dimensions are not the same.");
+        }
+
+        for (size_t i = 0; i < other.NumRows(); ++i)
+        {
+            GetRow(i).CopyFrom(other.GetRow(i));
+        }
+    }
+
+    template <typename ElementType, MatrixLayout layout>
+    void MatrixReference<ElementType, layout>::Swap(MatrixReference<ElementType, layout>& other)
+    {
+        ConstMatrixReference<ElementType, layout>::Swap(other);
+    }
+
+    template <typename ElementType, MatrixLayout layout>
+    void MatrixReference<ElementType, layout>::Fill(ElementType value)
+    {
+        for (size_t i = 0; i < this->GetMinorSize(); ++i)
+        {
+            auto vector = GetMajorVector(i);
+            vector.Fill(value);
+        }
+    }
+
+    template <typename ElementType, MatrixLayout layout>
+    template <typename GeneratorType>
+    void MatrixReference<ElementType, layout>::Generate(GeneratorType generator)
+    {
+        for (size_t i = 0; i < this->GetMinorSize(); ++i)
+        {
+            GetMajorVector(i).Generate(generator);
+        }
+    }
+
+    template <typename ElementType, MatrixLayout layout>
+    template <typename TransformationType>
+    void MatrixReference<ElementType, layout>::Transform(TransformationType transformation)
+    {
+        for (size_t i = 0; i < this->GetMinorSize(); ++i)
+        {
+            TransformUpdate(transformation, GetMajorVector(i));
+        }
+    }
+
+    template <typename ElementType, MatrixLayout layout>
+    auto MatrixReference<ElementType, layout>::Transpose() -> MatrixReference<ElementType, TransposeMatrixLayout<layout>::value>
+    {
+        return MatrixReference<ElementType, TransposeMatrixLayout<layout>::value>(GetDataPointer(), this->NumColumns(), this->NumRows(), this->GetIncrement());
+    }
+
+    template <typename ElementType, MatrixLayout layout>
+    MatrixReference<ElementType, layout> MatrixReference<ElementType, layout>::GetSubMatrix(size_t firstRow, size_t firstColumn, size_t numRows, size_t numColumns)
+    {
+        DEBUG_THROW(firstRow + numRows > this->NumRows() || firstColumn + numColumns > this->NumColumns(), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "block exceeds matrix dimensions."));
+
+        return MatrixReference<ElementType, layout>(GetDataPointer() + firstRow * this->GetRowIncrement() + firstColumn * this->GetColumnIncrement(), numRows, numColumns, this->GetIncrement());
+    }
+
+    template <typename ElementType, MatrixLayout layout>
+    ColumnVectorReference<ElementType> MatrixReference<ElementType, layout>::GetColumn(size_t index)
+    {
+        DEBUG_THROW(index >= this->NumColumns(), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "column index exceeds matrix dimensions."));
+
+        return ColumnVectorReference<ElementType>(GetDataPointer() + index * this->GetColumnIncrement(), this->NumRows(), this->GetRowIncrement());
+    }
+
+    template <typename ElementType, MatrixLayout layout>
+    RowVectorReference<ElementType> MatrixReference<ElementType, layout>::GetRow(size_t index)
+    {
+        DEBUG_THROW(index >= this->NumRows(), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "row index exceeds matrix dimensions."));
+
+        return RowVectorReference<ElementType>(GetDataPointer() + index * this->GetRowIncrement(), this->NumColumns(), this->GetColumnIncrement());
+    }
+
+    template <typename ElementType, MatrixLayout layout>
+    ColumnVectorReference<ElementType> MatrixReference<ElementType, layout>::GetDiagonal()
+    {
+        auto size = std::min(this->NumColumns(), this->NumRows());
+        return ColumnVectorReference<ElementType>(GetDataPointer(), size, this->GetIncrement() + 1);
+    }
+
+    template <typename ElementType, MatrixLayout layout>
+    ColumnVectorReference<ElementType> MatrixReference<ElementType, layout>::ReferenceAsVector()
+    {
+        DEBUG_THROW(!IsContiguous(), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "Can only flatten a matrix when its memory is contiguous"));
+        return ColumnVectorReference<ElementType>(GetDataPointer(), this->NumRows() * this->NumColumns(), 1);
+    }
+
+    //
+    // Matrix
+    //
+
+    template <typename ElementType, MatrixLayout layout>
+    Matrix<ElementType, layout>::Matrix(size_t numRows, size_t numColumns) :
+        MatrixReference<ElementType, layout>(nullptr, numRows, numColumns),
+        _data(numRows * numColumns)
+    {
+        this->_pData = _data.data();
+    }
+
+    template <typename ElementType, MatrixLayout layout>
+    Matrix<ElementType, layout>::Matrix(std::initializer_list<std::initializer_list<ElementType>> list) :
+        MatrixReference<ElementType, layout>(nullptr, list.size(), list.begin()->size()),
+        _data(list.size() * list.begin()->size())
+    {
+        this->_pData = _data.data();
+        auto numColumns = list.begin()->size();
+        DEBUG_USED(numColumns);
+
+        size_t i = 0;
+        for (auto rowIter = list.begin(); rowIter < list.end(); ++rowIter)
+        {
+            DEBUG_THROW(rowIter->size() != numColumns, utilities::InputException(utilities::InputExceptionErrors::sizeMismatch, "incorrect number of elements in initializer list"));
+
+            size_t j = 0;
+            for (auto elementIter = rowIter->begin(); elementIter < rowIter->end(); ++elementIter)
+            {
+                (*this)(i, j) = *elementIter;
+                ++j;
+            }
+            ++i;
+        }
+    }
+
+    template <typename ElementType, MatrixLayout layout>
+    Matrix<ElementType, layout>::Matrix(size_t numRows, size_t numColumns, const std::vector<ElementType>& data) :
+        MatrixReference<ElementType, layout>(nullptr, numRows, numColumns),
+        _data(data)
+    {
+        this->_pData = _data.data();
+    }
+
+    template <typename ElementType, MatrixLayout layout>
+    Matrix<ElementType, layout>::Matrix(size_t numRows, size_t numColumns, std::vector<ElementType>&& data) :
+        MatrixReference<ElementType, layout>(nullptr, numRows, numColumns),
+        _data(std::move(data))
+    {
+        this->_pData = _data.data();
+    }
+
+    template <typename ElementType, MatrixLayout layout>
+    Matrix<ElementType, layout>::Matrix(Matrix<ElementType, layout>&& other) :
+        MatrixReference<ElementType, layout>(nullptr, other.NumRows(), other.NumColumns()),
+        _data(std::move(other._data))
+    {
+        this->_pData = _data.data();
+    }
+
+    template <typename ElementType, MatrixLayout layout>
+    Matrix<ElementType, layout>::Matrix(const Matrix<ElementType, layout>& other) :
+        MatrixReference<ElementType, layout>(nullptr, other.NumRows(), other.NumColumns()),
+        _data(other._data)
+    {
+        this->_pData = _data.data();
+    }
+
+    template <typename ElementType, MatrixLayout layout>
+    Matrix<ElementType, layout>::Matrix(ConstMatrixReference<ElementType, layout>& other) :
+        MatrixReference<ElementType, layout>(nullptr, other.NumRows(), other.NumColumns()),
+        _data(other.NumRows() * other.NumColumns())
+    {
+        this->_pData = _data.data();
+        for (size_t i = 0; i < this->NumRows(); ++i)
+        {
+            for (size_t j = 0; j < this->NumColumns(); ++j)
+            {
+                (*this)(i, j) = other(i, j);
+            }
+        }
+    }
+
+    template <typename ElementType, MatrixLayout layout>
+    Matrix<ElementType, layout>::Matrix(ConstMatrixReference<ElementType, TransposeMatrixLayout<layout>::value> other) :
+        MatrixReference<ElementType, layout>(nullptr, other.NumRows(), other.NumColumns()),
+        _data(other.NumRows() * other.NumColumns())
+    {
+        this->_pData = _data.data();
+        for (size_t i = 0; i < this->NumRows(); ++i)
+        {
+            for (size_t j = 0; j < this->NumColumns(); ++j)
+            {
+                (*this)(i, j) = other(i, j);
+            }
+        }
+    }
+
+    template <typename ElementType, MatrixLayout layout>
+    Matrix<ElementType, layout>& Matrix<ElementType, layout>::operator=(Matrix<ElementType, layout> other)
+    {
+        Swap(other);
+        return *this;
+    }
+
+    template <typename ElementType, MatrixLayout layout>
+    void Matrix<ElementType, layout>::Swap(Matrix<ElementType, layout>& other)
+    {
+        MatrixReference<ElementType, layout>::Swap(other);
+        std::swap(_data, other._data);
+    }
+
+    template <typename ElementType, MatrixLayout layout>
+    void MatrixArchiver::Write(const Matrix<ElementType, layout>& matrix, const std::string& name, utilities::Archiver& archiver)
+    {
+        archiver[GetRowsName(name)] << matrix.NumRows();
+        archiver[GetColumnsName(name)] << matrix.NumColumns();
+        archiver[GetValuesName(name)] << matrix.ToArray();
+    }
+
+    template <typename ElementType, MatrixLayout layout>
+    void MatrixArchiver::Read(Matrix<ElementType, layout>& matrix, const std::string& name, utilities::Unarchiver& archiver)
+    {
+        size_t rows = 0;
+        size_t columns = 0;
+        std::vector<ElementType> values;
+
+        archiver[GetRowsName(name)] >> rows;
+        archiver[GetColumnsName(name)] >> columns;
+        archiver[GetValuesName(name)] >> values;
+
+        Matrix<ElementType, layout> value(rows, columns, std::move(values));
+
+        matrix = std::move(value);
+    }
+} // namespace math
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/math/include/MatrixOperations.h b/libraries/math/include/MatrixOperations.h
index 3c6af3ee6..91d82b3ba 100644
--- a/libraries/math/include/MatrixOperations.h
+++ b/libraries/math/include/MatrixOperations.h
@@ -423,4 +423,722 @@ namespace math
 } // namespace math
 } // namespace ell
 
-#include "../tcc/MatrixOperations.tcc"
+#pragma region implementation
+
+#include "../include/VectorOperations.h"
+
+#include <utilities/include/Debug.h>
+#include <utilities/include/Exception.h>
+#include <utilities/include/Logger.h>
+
+namespace ell
+{
+namespace math
+{
+    template <typename ElementType, MatrixLayout layout>
+    void Print(ConstMatrixReference<ElementType, layout> M, std::ostream& stream, size_t indent, size_t maxRows, size_t maxElementsPerRow)
+    {
+        using namespace logging;
+
+        stream << std::string(indent, ' ') << "{";
+        if (M.NumRows() > 0)
+        {
+            Print(M.GetRow(0), stream, 1, maxElementsPerRow);
+        }
+
+        if (M.NumRows() <= maxRows)
+        {
+            for (size_t i = 1; i < M.NumRows(); ++i)
+            {
+                stream << "," << EOL;
+                Print(M.GetRow(i), stream, indent + 2, maxElementsPerRow);
+            }
+        }
+        else
+        {
+            for (size_t i = 1; i < maxRows - 2; ++i)
+            {
+                stream << "," << EOL;
+                Print(M.GetRow(i), stream, indent + 2, maxElementsPerRow);
+            }
+            stream << "," << EOL
+                   << std::string(indent + 2, ' ') << "...," << EOL;
+            Print(M.GetRow(M.NumRows() - 1), stream, indent + 2, maxElementsPerRow);
+        }
+        stream << " }" << EOL;
+    }
+
+    template <typename ElementType, MatrixLayout layout>
+    std::ostream& operator<<(std::ostream& stream, ConstMatrixReference<ElementType, layout> M)
+    {
+        Print(M, stream);
+        return stream;
+    }
+
+    template <typename MatrixElementType, MatrixLayout layout, typename ScalarElementType, utilities::IsFundamental<ScalarElementType>>
+    void operator+=(MatrixReference<MatrixElementType, layout> matrix, ScalarElementType scalar)
+    {
+        AddUpdate(static_cast<MatrixElementType>(scalar), matrix);
+    }
+
+    template <typename ElementType, MatrixLayout layoutA, MatrixLayout layoutB>
+    void operator+=(MatrixReference<ElementType, layoutB> matrixB, ConstMatrixReference<ElementType, layoutA> matrixA)
+    {
+        AddUpdate(matrixA, matrixB);
+    }
+
+    template <typename MatrixElementType, MatrixLayout layout, typename ScalarElementType, utilities::IsFundamental<ScalarElementType>>
+    void operator-=(MatrixReference<MatrixElementType, layout> matrix, ScalarElementType scalar)
+    {
+        AddUpdate(-static_cast<MatrixElementType>(scalar), matrix);
+    }
+
+    template <typename ElementType, MatrixLayout layoutA, MatrixLayout layoutB>
+    void operator-=(MatrixReference<ElementType, layoutB> matrixB, ConstMatrixReference<ElementType, layoutA> matrixA)
+    {
+        ScaleAddUpdate(static_cast<ElementType>(-1), matrixA, One(), matrixB);
+    }
+
+    template <typename MatrixElementType, MatrixLayout layout, typename ScalarElementType, utilities::IsFundamental<ScalarElementType>>
+    void operator*=(MatrixReference<MatrixElementType, layout> matrix, ScalarElementType scalar)
+    {
+        ScaleUpdate(static_cast<MatrixElementType>(scalar), matrix);
+    }
+
+    template <typename MatrixElementType, MatrixLayout layout, typename ScalarElementType, utilities::IsFundamental<ScalarElementType>>
+    void operator/=(MatrixReference<MatrixElementType, layout> matrix, ScalarElementType scalar)
+    {
+        DEBUG_THROW(scalar == 0, utilities::NumericException(utilities::NumericExceptionErrors::divideByZero, "divide by zero"));
+
+        ScaleUpdate(1 / static_cast<MatrixElementType>(scalar), matrix);
+    }
+
+    template <ImplementationType implementation, typename ElementType, MatrixLayout layout>
+    void AddUpdate(ElementType scalar, MatrixReference<ElementType, layout> matrix)
+    {
+        if (scalar == 0)
+        {
+            return;
+        }
+        if (matrix.IsContiguous())
+        {
+            Internal::VectorOperations<implementation>::AddUpdate(scalar, matrix.ReferenceAsVector());
+        }
+        else
+        {
+            for (size_t i = 0; i < matrix.GetMinorSize(); ++i)
+            {
+                Internal::VectorOperations<implementation>::AddUpdate(scalar, matrix.GetMajorVector(i));
+            }
+        }
+    }
+
+    namespace Internal
+    {
+        template <ImplementationType implementation, typename ElementType, MatrixLayout layout>
+        void AddUpdateAsVectors(ConstMatrixReference<ElementType, layout> matrixA, MatrixReference<ElementType, layout> matrixB)
+        {
+            if (matrixA.IsContiguous() && matrixB.IsContiguous())
+            {
+                Internal::VectorOperations<implementation>::AddUpdate(matrixA.ReferenceAsVector(), matrixB.ReferenceAsVector());
+            }
+            else
+            {
+                for (size_t i = 0; i < matrixA.GetMinorSize(); ++i)
+                {
+                    Internal::VectorOperations<implementation>::AddUpdate(matrixA.GetMajorVector(i), matrixB.GetMajorVector(i));
+                }
+            }
+        }
+
+        template <ImplementationType implementation, typename ElementType, MatrixLayout layout>
+        void AddUpdateAsVectors(ConstMatrixReference<ElementType, layout> matrixA, MatrixReference<ElementType, TransposeMatrixLayout<layout>::value> matrixB)
+        {
+            for (size_t i = 0; i < matrixA.NumRows(); ++i)
+            {
+                Internal::VectorOperations<implementation>::AddUpdate(matrixA.GetRow(i), matrixB.GetRow(i));
+            }
+        }
+    } // namespace Internal
+
+    template <ImplementationType implementation, typename ElementType, MatrixLayout layoutA, MatrixLayout layoutB>
+    void AddUpdate(ConstMatrixReference<ElementType, layoutA> matrixA, MatrixReference<ElementType, layoutB> matrixB)
+    {
+        DEBUG_CHECK_SIZES(matrixA.NumRows() != matrixB.NumRows() || matrixA.NumColumns() != matrixB.NumColumns(), "Incompatible matrix sizes.");
+
+        Internal::AddUpdateAsVectors<implementation>(matrixA, matrixB);
+    }
+
+    namespace Internal
+    {
+        template <ImplementationType implementation, typename ElementType, MatrixLayout layout>
+        void AddSetAsVectors(ElementType scalar, ConstMatrixReference<ElementType, layout> matrix, MatrixReference<ElementType, layout> output)
+        {
+            if (matrix.IsContiguous() && output.IsContiguous())
+            {
+                Internal::VectorOperations<implementation>::AddSet(scalar, matrix.ReferenceAsVector(), output.ReferenceAsVector());
+            }
+            else
+            {
+                for (size_t i = 0; i < matrix.GetMinorSize(); ++i)
+                {
+                    Internal::VectorOperations<implementation>::AddSet(scalar, matrix.GetMajorVector(i), output.GetMajorVector(i));
+                }
+            }
+        }
+
+        template <ImplementationType implementation, typename ElementType, MatrixLayout layout>
+        void AddSetAsVectors(ElementType scalar, ConstMatrixReference<ElementType, layout> matrix, MatrixReference<ElementType, TransposeMatrixLayout<layout>::value> output)
+        {
+            for (size_t i = 0; i < matrix.NumRows(); ++i)
+            {
+                Internal::VectorOperations<implementation>::AddSet(scalar, matrix.GetRow(i), output.GetRow(i));
+            }
+        }
+
+        template <ImplementationType implementation, typename ElementType, MatrixLayout layout>
+        void AddSetAsVectors(ConstMatrixReference<ElementType, layout> matrixA, ConstMatrixReference<ElementType, layout> matrixB, MatrixReference<ElementType, layout> output)
+        {
+            if (matrixA.IsContiguous() && matrixB.IsContiguous() && output.IsContiguous())
+            {
+                Internal::VectorOperations<implementation>::AddSet(matrixA.ReferenceAsVector(), matrixB.ReferenceAsVector(), output.ReferenceAsVector());
+            }
+            else
+            {
+                for (size_t i = 0; i < matrixA.GetMinorSize(); ++i)
+                {
+                    Internal::VectorOperations<implementation>::AddSet(matrixA.GetMajorVector(i), matrixB.GetMajorVector(i), output.GetMajorVector(i));
+                }
+            }
+        }
+
+        template <ImplementationType implementation, typename ElementType, MatrixLayout layout>
+        void AddSetAsVectors(ConstMatrixReference<ElementType, layout> matrixA, ConstMatrixReference<ElementType, layout> matrixB, MatrixReference<ElementType, TransposeMatrixLayout<layout>::value> output)
+        {
+            for (size_t i = 0; i < matrixA.NumRows(); ++i)
+            {
+                Internal::VectorOperations<implementation>::AddSet(matrixA.GetRow(i), matrixB.GetRow(i), output.GetRow(i));
+            }
+        }
+
+        template <ImplementationType implementation, typename ElementType, MatrixLayout layout, MatrixLayout outputLayout>
+        void AddSetAsVectors(ConstMatrixReference<ElementType, layout> matrixA, ConstMatrixReference<ElementType, TransposeMatrixLayout<layout>::value> matrixB, MatrixReference<ElementType, outputLayout> output)
+        {
+            for (size_t i = 0; i < matrixA.NumRows(); ++i)
+            {
+                Internal::VectorOperations<implementation>::AddSet(matrixA.GetRow(i), matrixB.GetRow(i), output.GetRow(i));
+            }
+        }
+    } // namespace Internal
+
+    template <ImplementationType implementation, typename ElementType, MatrixLayout layout, MatrixLayout outputLayout>
+    void AddSet(ElementType scalar, ConstMatrixReference<ElementType, layout> matrix, MatrixReference<ElementType, outputLayout> output)
+    {
+        DEBUG_CHECK_SIZES(matrix.NumRows() != output.NumRows() || matrix.NumColumns() != output.NumColumns(), "Incompatible matrix sizes.");
+
+        if (scalar == 0)
+        {
+            output.CopyFrom(matrix);
+        }
+        else
+        {
+            Internal::AddSetAsVectors<implementation>(scalar, matrix, output);
+        }
+    }
+
+    template <ImplementationType implementation, typename ElementType, MatrixLayout layoutA, MatrixLayout layoutB, MatrixLayout outputLayout>
+    void AddSet(ConstMatrixReference<ElementType, layoutA> matrixA, ConstMatrixReference<ElementType, layoutB> matrixB, MatrixReference<ElementType, outputLayout> output)
+    {
+        DEBUG_CHECK_SIZES(matrixA.NumRows() != matrixB.NumRows() || matrixA.NumColumns() != matrixB.NumColumns() || matrixA.NumRows() != output.NumRows() || matrixA.NumColumns() != output.NumColumns(), "Incompatible matrix sizes.");
+
+        Internal::AddSetAsVectors<implementation>(matrixA, matrixB, output);
+    }
+
+    template <ImplementationType implementation, typename ElementType, MatrixLayout layout>
+    void ScaleUpdate(ElementType scalar, MatrixReference<ElementType, layout> matrix)
+    {
+        if (scalar == 0)
+        {
+            matrix.Reset();
+        }
+        else if (scalar == 1)
+        {
+            return;
+        }
+        else if (matrix.IsContiguous())
+        {
+            Internal::VectorOperations<implementation>::ScaleUpdate(scalar, matrix.ReferenceAsVector());
+        }
+        else
+        {
+            for (size_t i = 0; i < matrix.GetMinorSize(); ++i)
+            {
+                Internal::VectorOperations<implementation>::ScaleUpdate(scalar, matrix.GetMajorVector(i));
+            }
+        }
+    }
+
+    // implementations of ScaleSet using the equivalent vector operation
+    namespace Internal
+    {
+        template <ImplementationType implementation, typename ElementType, MatrixLayout layout>
+        void ScaleSetAsVectors(ElementType scalar, ConstMatrixReference<ElementType, layout> matrix, MatrixReference<ElementType, layout> output)
+        {
+            if (matrix.IsContiguous() && output.IsContiguous())
+            {
+                Internal::VectorOperations<implementation>::ScaleSet(scalar, matrix.ReferenceAsVector(), output.ReferenceAsVector());
+            }
+            else
+            {
+                for (size_t i = 0; i < matrix.GetMinorSize(); ++i)
+                {
+                    Internal::VectorOperations<implementation>::ScaleSet(scalar, matrix.GetMajorVector(i), output.GetMajorVector(i));
+                }
+            }
+        }
+
+        template <ImplementationType implementation, typename ElementType, MatrixLayout layout>
+        void ScaleSetAsVectors(ElementType scalar, ConstMatrixReference<ElementType, layout> matrix, MatrixReference<ElementType, TransposeMatrixLayout<layout>::value> output)
+        {
+            for (size_t i = 0; i < matrix.NumRows(); ++i)
+            {
+                Internal::VectorOperations<implementation>::ScaleSet(scalar, matrix.GetRow(i), output.GetRow(i));
+            }
+        }
+    } // namespace Internal
+
+    template <ImplementationType implementation, typename ElementType, MatrixLayout matrixLayout, MatrixLayout outputLayout>
+    void ScaleSet(ElementType scalar, ConstMatrixReference<ElementType, matrixLayout> matrix, MatrixReference<ElementType, outputLayout> output)
+    {
+        DEBUG_CHECK_SIZES(matrix.NumRows() != output.NumRows() || matrix.NumColumns() != output.NumColumns(), "Incompatible matrix sizes.");
+
+        if (scalar == 0)
+        {
+            output.Reset();
+        }
+        else if (scalar == 1)
+        {
+            output.CopyFrom(matrix);
+        }
+        else
+        {
+            Internal::ScaleSetAsVectors<implementation>(scalar, matrix, output);
+        }
+    }
+
+    // implementations of ScaleAddUpdate using the equivalent vector operation
+    namespace Internal
+    {
+        template <ImplementationType implementation, typename ElementType, typename scalarAType, typename scalarBType, MatrixLayout layout>
+        void ScaleAddUpdateAsVectors(scalarAType scalarA, ConstMatrixReference<ElementType, layout> matrixA, scalarBType scalarB, MatrixReference<ElementType, layout> matrixB)
+        {
+            if (matrixA.IsContiguous() && matrixB.IsContiguous())
+            {
+                Internal::VectorOperations<implementation>::ScaleAddUpdate(scalarA, matrixA.ReferenceAsVector(), scalarB, matrixB.ReferenceAsVector());
+            }
+            else
+            {
+                for (size_t i = 0; i < matrixA.GetMinorSize(); ++i)
+                {
+                    Internal::VectorOperations<implementation>::ScaleAddUpdate(scalarA, matrixA.GetMajorVector(i), scalarB, matrixB.GetMajorVector(i));
+                }
+            }
+        }
+
+        template <ImplementationType implementation, typename ElementType, typename scalarAType, typename scalarBType, MatrixLayout layout>
+        void ScaleAddUpdateAsVectors(scalarAType scalarA, ConstMatrixReference<ElementType, layout> matrixA, scalarBType scalarB, MatrixReference<ElementType, TransposeMatrixLayout<layout>::value> matrixB)
+        {
+            for (size_t i = 0; i < matrixA.NumRows(); ++i)
+            {
+                Internal::VectorOperations<implementation>::ScaleAddUpdate(scalarA, matrixA.GetRow(i), scalarB, matrixB.GetRow(i));
+            }
+        }
+    } // namespace Internal
+
+    // matrixB += scalarA * matrixA
+    template <ImplementationType implementation, typename ElementType, MatrixLayout layoutA, MatrixLayout layoutB>
+    void ScaleAddUpdate(ElementType scalarA, ConstMatrixReference<ElementType, layoutA> matrixA, One, MatrixReference<ElementType, layoutB> matrixB)
+    {
+        DEBUG_CHECK_SIZES(matrixA.NumRows() != matrixB.NumRows() || matrixA.NumColumns() != matrixB.NumColumns(), "Incompatible matrix sizes.");
+
+        if (scalarA == 0)
+        {
+            return;
+        }
+        else if (scalarA == 1)
+        {
+            AddUpdate<implementation>(matrixA, matrixB);
+        }
+        else
+        {
+            Internal::ScaleAddUpdateAsVectors<implementation>(scalarA, matrixA, One(), matrixB);
+        }
+    }
+
+    // matrixB = scalarA * ones + scalarB * matrixB
+    template <ImplementationType implementation, typename ElementType, MatrixLayout layout>
+    void ScaleAddUpdate(ElementType scalarA, OnesMatrix, ElementType scalarB, MatrixReference<ElementType, layout> matrixB)
+    {
+        if (scalarA == 0)
+        {
+            ScaleUpdate<implementation>(scalarB, matrixB);
+        }
+        else if (scalarB == 0)
+        {
+            matrixB.Fill(scalarA);
+        }
+        else if (scalarB == 1)
+        {
+            AddUpdate<implementation>(scalarA, matrixB);
+        }
+        else if (matrixB.IsContiguous())
+        {
+            Internal::VectorOperations<implementation>::ScaleAddUpdate(scalarA, OnesVector(), scalarB, matrixB.ReferenceAsVector());
+        }
+        else
+        {
+            for (size_t i = 0; i < matrixB.GetMinorSize(); ++i)
+            {
+                Internal::VectorOperations<implementation>::ScaleAddUpdate(scalarA, OnesVector(), scalarB, matrixB.GetMajorVector(i));
+            }
+        }
+    }
+
+    // matrixB = matrixA + scalarB * matrixB
+    template <ImplementationType implementation, typename ElementType, MatrixLayout layoutA, MatrixLayout layoutB>
+    void ScaleAddUpdate(One, ConstMatrixReference<ElementType, layoutA> matrixA, ElementType scalarB, MatrixReference<ElementType, layoutB> matrixB)
+    {
+        DEBUG_CHECK_SIZES(matrixA.NumRows() != matrixB.NumRows() || matrixA.NumColumns() != matrixB.NumColumns(), "Incompatible matrix sizes.");
+
+        if (scalarB == 0)
+        {
+            matrixB.CopyFrom(matrixA);
+        }
+        else if (scalarB == 1)
+        {
+            AddUpdate<implementation>(matrixA, matrixB);
+        }
+        else
+        {
+            Internal::ScaleAddUpdateAsVectors<implementation>(One(), matrixA, scalarB, matrixB);
+        }
+    }
+
+    // matrixB = scalarA * matrixA + scalarB * matrixB
+    template <ImplementationType implementation, typename ElementType, MatrixLayout layoutA, MatrixLayout layoutB>
+    void ScaleAddUpdate(ElementType scalarA, ConstMatrixReference<ElementType, layoutA> matrixA, ElementType scalarB, MatrixReference<ElementType, layoutB> matrixB)
+    {
+        DEBUG_CHECK_SIZES(matrixA.NumRows() != matrixB.NumRows() || matrixA.NumColumns() != matrixB.NumColumns(), "Incompatible matrix sizes.");
+
+        if (scalarA == 0)
+        {
+            ScaleUpdate<implementation>(scalarB, matrixB);
+        }
+        else if (scalarA == 1)
+        {
+            ScaleAddUpdate<implementation>(One(), matrixA, scalarB, matrixB);
+        }
+        else if (scalarB == 0)
+        {
+            Internal::ScaleSetAsVectors<implementation>(scalarA, matrixA, matrixB);
+        }
+        else if (scalarB == 1)
+        {
+            Internal::ScaleAddUpdateAsVectors<implementation>(scalarA, matrixA, One(), matrixB);
+        }
+        else
+        {
+            Internal::ScaleAddUpdateAsVectors<implementation>(scalarA, matrixA, scalarB, matrixB);
+        }
+    }
+
+    // implementations of ScaleAddSet using the equivalent vector operation
+    namespace Internal
+    {
+        template <ImplementationType implementation, typename ElementType, typename scalarAType, typename scalarBType, MatrixLayout layout>
+        void ScaleAddSetAsVectors(scalarAType scalarA, ConstMatrixReference<ElementType, layout> matrixA, scalarBType scalarB, ConstMatrixReference<ElementType, layout> matrixB, MatrixReference<ElementType, layout> output)
+        {
+            if (matrixA.IsContiguous() && matrixB.IsContiguous() && output.IsContiguous())
+            {
+                Internal::VectorOperations<implementation>::ScaleAddSet(scalarA, matrixA.ReferenceAsVector(), scalarB, matrixB.ReferenceAsVector(), output.ReferenceAsVector());
+            }
+            else
+            {
+                for (size_t i = 0; i < matrixA.GetMinorSize(); ++i)
+                {
+                    Internal::VectorOperations<implementation>::ScaleAddSet(scalarA, matrixA.GetMajorVector(i), scalarB, matrixB.GetMajorVector(i), output.GetMajorVector(i));
+                }
+            }
+        }
+
+        template <ImplementationType implementation, typename ElementType, typename scalarAType, typename scalarBType, MatrixLayout layout>
+        void ScaleAddSetAsVectors(scalarAType scalarA, ConstMatrixReference<ElementType, layout> matrixA, scalarBType scalarB, ConstMatrixReference<ElementType, layout> matrixB, MatrixReference<ElementType, TransposeMatrixLayout<layout>::value> output)
+        {
+            for (size_t i = 0; i < matrixA.NumRows(); ++i)
+            {
+                Internal::VectorOperations<implementation>::ScaleAddSet(scalarA, matrixA.GetRow(i), scalarB, matrixB.GetRow(i), output.GetRow(i));
+            }
+        }
+
+        template <ImplementationType implementation, typename ElementType, typename scalarAType, typename scalarBType, MatrixLayout layout, MatrixLayout outputLayout>
+        void ScaleAddSetAsVectors(scalarAType scalarA, ConstMatrixReference<ElementType, layout> matrixA, scalarBType scalarB, ConstMatrixReference<ElementType, TransposeMatrixLayout<layout>::value> matrixB, MatrixReference<ElementType, outputLayout> output)
+        {
+            for (size_t i = 0; i < matrixA.NumRows(); ++i)
+            {
+                Internal::VectorOperations<implementation>::ScaleAddSet(scalarA, matrixA.GetRow(i), scalarB, matrixB.GetRow(i), output.GetRow(i));
+            }
+        }
+    } // namespace Internal
+
+    // output = scalarA * matrixA + matrixB
+    template <ImplementationType implementation, typename ElementType, MatrixLayout layoutA, MatrixLayout layoutB, MatrixLayout outputLayout>
+    void ScaleAddSet(ElementType scalarA, ConstMatrixReference<ElementType, layoutA> matrixA, One, ConstMatrixReference<ElementType, layoutB> matrixB, MatrixReference<ElementType, outputLayout> output)
+    {
+        DEBUG_CHECK_SIZES(matrixA.NumRows() != matrixB.NumRows() || matrixA.NumColumns() != matrixB.NumColumns() || matrixA.NumRows() != output.NumRows() || matrixA.NumColumns() != output.NumColumns(), "Incompatible matrix sizes.");
+
+        if (scalarA == 0)
+        {
+            output.CopyFrom(matrixB);
+        }
+        else if (scalarA == 1)
+        {
+            Internal::AddSetAsVectors<implementation>(matrixA, matrixB, output);
+        }
+        else
+        {
+            Internal::ScaleAddSetAsVectors<implementation>(scalarA, matrixA, One(), matrixB, output);
+        }
+    }
+
+    //// output = matrixA + scalarB * matrixB
+    template <ImplementationType implementation, typename ElementType, MatrixLayout layoutA, MatrixLayout layoutB, MatrixLayout outputLayout>
+    void ScaleAddSet(One, ConstMatrixReference<ElementType, layoutA> matrixA, ElementType scalarB, ConstMatrixReference<ElementType, layoutB> matrixB, MatrixReference<ElementType, outputLayout> output)
+    {
+        DEBUG_CHECK_SIZES(matrixA.NumRows() != matrixB.NumRows() || matrixA.NumColumns() != matrixB.NumColumns() || matrixA.NumRows() != output.NumRows() || matrixA.NumColumns() != output.NumColumns(), "Incompatible matrix sizes.");
+
+        if (scalarB == 0)
+        {
+            output.CopyFrom(matrixA);
+        }
+        else if (scalarB == 1)
+        {
+            Internal::AddSetAsVectors<implementation>(matrixA, matrixB, output);
+        }
+        else
+        {
+            Internal::ScaleAddSetAsVectors<implementation>(One(), matrixA, scalarB, matrixB, output);
+        }
+    }
+
+    // output = scalarA * matrixA + scalarB * matrixB
+    template <ImplementationType implementation, typename ElementType, MatrixLayout layoutA, MatrixLayout layoutB, MatrixLayout outputLayout>
+    void ScaleAddSet(ElementType scalarA, ConstMatrixReference<ElementType, layoutA> matrixA, ElementType scalarB, ConstMatrixReference<ElementType, layoutB> matrixB, MatrixReference<ElementType, outputLayout> output)
+    {
+        DEBUG_CHECK_SIZES(matrixA.NumRows() != matrixB.NumRows() || matrixA.NumColumns() != matrixB.NumColumns() || matrixA.NumRows() != output.NumRows() || matrixA.NumColumns() != output.NumColumns(), "Incompatible matrix sizes.");
+
+        if (scalarA == 0)
+        {
+            ScaleSet<implementation>(scalarB, matrixB, output);
+        }
+        else if (scalarA == 1)
+        {
+            ScaleAddSet<implementation>(One(), matrixA, scalarB, matrixB, output);
+        }
+        else if (scalarB == 0)
+        {
+            Internal::ScaleSetAsVectors<implementation>(scalarA, matrixA, output);
+        }
+        else if (scalarB == 1)
+        {
+            Internal::ScaleAddSetAsVectors<implementation>(scalarA, matrixA, One(), matrixB, output);
+        }
+        else
+        {
+            Internal::ScaleAddSetAsVectors<implementation>(scalarA, matrixA, scalarB, matrixB, output);
+        }
+    }
+
+    template <typename ElementType, MatrixLayout layout>
+    void RowwiseSum(ConstMatrixReference<ElementType, layout> matrix, ColumnVectorReference<ElementType> vector)
+    {
+        DEBUG_CHECK_SIZES(vector.Size() != matrix.NumRows(), "Incompatible matrix vector sizes.");
+
+        math::ColumnVector<ElementType> ones(matrix.NumColumns());
+        ones.Fill(1);
+
+        MultiplyScaleAddUpdate(static_cast<ElementType>(1), matrix, ones, static_cast<ElementType>(0), vector);
+    }
+
+    template <typename ElementType, MatrixLayout layout>
+    void ColumnwiseSum(ConstMatrixReference<ElementType, layout> matrix, RowVectorReference<ElementType> vector)
+    {
+        DEBUG_CHECK_SIZES(vector.Size() != matrix.NumColumns(), "Incompatible matrix vector sizes.");
+
+        math::RowVector<ElementType> ones(matrix.NumRows());
+        ones.Fill(1);
+
+        MultiplyScaleAddUpdate(static_cast<ElementType>(1), ones, matrix, static_cast<ElementType>(0), vector);
+    }
+
+    template <ImplementationType implementation, typename ElementType, MatrixLayout layout>
+    void RankOneUpdate(ElementType scalar, ConstColumnVectorReference<ElementType> vectorA, ConstRowVectorReference<ElementType> vectorB, MatrixReference<ElementType, layout> matrix)
+    {
+        DEBUG_CHECK_SIZES(vectorA.Size() != matrix.NumRows() || vectorB.Size() != matrix.NumColumns(), "Incompatible matrix vector sizes.");
+        Internal::MatrixOperations<implementation>::RankOneUpdate(scalar, vectorA, vectorB, matrix);
+    }
+
+    template <ImplementationType implementation, typename ElementType, MatrixLayout layout>
+    void MultiplyScaleAddUpdate(ElementType scalarA, ConstMatrixReference<ElementType, layout> matrix, ConstColumnVectorReference<ElementType> vectorA, ElementType scalarB, ColumnVectorReference<ElementType> vectorB)
+    {
+        DEBUG_CHECK_SIZES(matrix.NumColumns() != vectorA.Size() || matrix.NumRows() != vectorB.Size(), "Incompatible matrix vector sizes.");
+
+        Internal::MatrixOperations<implementation>::MultiplyScaleAddUpdate(scalarA, matrix, vectorA, scalarB, vectorB);
+    }
+
+    template <ImplementationType implementation, typename ElementType, MatrixLayout layout>
+    void MultiplyScaleAddUpdate(ElementType scalarA, ConstRowVectorReference<ElementType> vectorA, ConstMatrixReference<ElementType, layout> matrix, ElementType scalarB, RowVectorReference<ElementType> vectorB)
+    {
+        DEBUG_CHECK_SIZES(matrix.NumRows() != vectorA.Size() || matrix.NumColumns() != vectorB.Size(), "Incompatible matrix vector sizes.");
+
+        Internal::MatrixOperations<implementation>::MultiplyScaleAddUpdate(scalarA, vectorA, matrix, scalarB, vectorB);
+    }
+
+    template <ImplementationType implementation, typename ElementType, MatrixLayout layoutA, MatrixLayout layoutB, MatrixLayout layoutC>
+    void MultiplyScaleAddUpdate(ElementType scalarA, ConstMatrixReference<ElementType, layoutA> matrixA, ConstMatrixReference<ElementType, layoutB> matrixB, ElementType scalarC, MatrixReference<ElementType, layoutC> matrixC)
+    {
+        DEBUG_CHECK_SIZES(matrixA.NumColumns() != matrixB.NumRows() || matrixA.NumRows() != matrixC.NumRows() || matrixB.NumColumns() != matrixC.NumColumns(), "Incompatible matrix sizes.");
+
+        Internal::MatrixOperations<implementation>::MultiplyScaleAddUpdate(scalarA, matrixA, matrixB, scalarC, matrixC);
+    }
+
+    template <typename ElementType, MatrixLayout layoutA, MatrixLayout layoutB>
+    void ElementwiseMultiplySet(ConstMatrixReference<ElementType, layoutA> matrixA, ConstMatrixReference<ElementType, layoutB> matrixB, MatrixReference<ElementType, layoutA> matrixC)
+    {
+        for (size_t i = 0; i < matrixA.NumRows(); ++i)
+        {
+            ElementwiseMultiplySet(matrixA.GetRow(i), matrixB.GetRow(i), matrixC.GetRow(i));
+        }
+    }
+
+    template <typename ElementType, MatrixLayout layout>
+    void RowwiseCumulativeSumUpdate(MatrixReference<ElementType, layout> matrix)
+    {
+        for (size_t i = 0; i < matrix.NumRows(); ++i)
+        {
+            CumulativeSumUpdate(matrix.GetRow(i));
+        }
+    }
+
+    template <typename ElementType, MatrixLayout layout>
+    void ColumnwiseCumulativeSumUpdate(MatrixReference<ElementType, layout> matrix)
+    {
+        for (size_t i = 0; i < matrix.NumColumns(); ++i)
+        {
+            CumulativeSumUpdate(matrix.GetColumn(i));
+        }
+    }
+
+    template <typename ElementType, MatrixLayout layout>
+    void RowwiseConsecutiveDifferenceUpdate(MatrixReference<ElementType, layout> matrix)
+    {
+        for (size_t i = 0; i < matrix.NumRows(); ++i)
+        {
+            ConsecutiveDifferenceUpdate(matrix.GetRow(i));
+        }
+    }
+
+    template <typename ElementType, MatrixLayout layout>
+    void ColumnwiseConsecutiveDifferenceUpdate(MatrixReference<ElementType, layout> matrix)
+    {
+        for (size_t i = 0; i < matrix.NumColumns(); ++i)
+        {
+            ConsecutiveDifferenceUpdate(matrix.GetColumn(i));
+        }
+    }
+
+    //
+    // Native implementations of operations
+    //
+
+    namespace Internal
+    {
+        template <typename ElementType, MatrixLayout layout>
+        void MatrixOperations<ImplementationType::native>::RankOneUpdate(ElementType scalar, ConstColumnVectorReference<ElementType> vectorA, ConstRowVectorReference<ElementType> vectorB, MatrixReference<ElementType, layout> matrix)
+        {
+            for (size_t i = 0; i < matrix.NumRows(); ++i)
+            {
+                for (size_t j = 0; j < matrix.NumColumns(); ++j)
+                {
+                    matrix(i, j) += scalar * vectorA[i] * vectorB[j];
+                }
+            }
+        }
+
+        template <typename ElementType, MatrixLayout layout>
+        void MatrixOperations<ImplementationType::native>::MultiplyScaleAddUpdate(ElementType scalarA, ConstMatrixReference<ElementType, layout> matrix, ConstColumnVectorReference<ElementType> vectorA, ElementType scalarB, ColumnVectorReference<ElementType> vectorB)
+        {
+            for (size_t i = 0; i < matrix.NumRows(); ++i)
+            {
+                auto row = matrix.GetRow(i);
+                vectorB[i] = scalarA * Dot(row, vectorA) + scalarB * vectorB[i];
+            }
+        }
+
+        template <typename ElementType, MatrixLayout layout>
+        void MatrixOperations<ImplementationType::native>::MultiplyScaleAddUpdate(ElementType scalarA, ConstRowVectorReference<ElementType> vectorA, ConstMatrixReference<ElementType, layout> matrix, ElementType scalarB, RowVectorReference<ElementType> vectorB)
+        {
+            MultiplyScaleAddUpdate(scalarA, matrix.Transpose(), vectorA.Transpose(), scalarB, vectorB.Transpose());
+        }
+
+        template <typename ElementType, MatrixLayout layoutA, MatrixLayout layoutB, MatrixLayout layoutC>
+        void MatrixOperations<ImplementationType::native>::MultiplyScaleAddUpdate(ElementType scalarA, ConstMatrixReference<ElementType, layoutA> matrixA, ConstMatrixReference<ElementType, layoutB> matrixB, ElementType scalarB, MatrixReference<ElementType, layoutC> matrixC)
+        {
+            for (size_t i = 0; i < matrixA.NumRows(); ++i)
+            {
+                for (size_t j = 0; j < matrixB.NumColumns(); ++j)
+                {
+                    auto row = matrixA.GetRow(i);
+                    auto column = matrixB.GetColumn(j);
+                    matrixC(i, j) = scalarA * Dot(row, column) + scalarB * matrixC(i, j);
+                }
+            }
+        }
+
+#if defined(USE_BLAS)
+        //
+        // OpenBLAS implementations of operations
+        //
+
+        template <typename ElementType, MatrixLayout layout>
+        void MatrixOperations<ImplementationType::openBlas>::RankOneUpdate(ElementType scalar, ConstColumnVectorReference<ElementType> vectorA, ConstRowVectorReference<ElementType> vectorB, MatrixReference<ElementType, layout> matrix)
+        {
+            Blas::Ger(matrix.GetLayout(), static_cast<int>(matrix.NumRows()), static_cast<int>(matrix.NumColumns()), scalar, vectorA.GetConstDataPointer(), static_cast<int>(vectorA.GetIncrement()), vectorB.GetConstDataPointer(), static_cast<int>(vectorB.GetIncrement()), matrix.GetDataPointer(), static_cast<int>(matrix.GetIncrement()));
+        }
+
+        template <typename ElementType, MatrixLayout layout>
+        void MatrixOperations<ImplementationType::openBlas>::MultiplyScaleAddUpdate(ElementType scalarA, ConstMatrixReference<ElementType, layout> matrix, ConstColumnVectorReference<ElementType> vectorA, ElementType scalarB, ColumnVectorReference<ElementType> vectorB)
+        {
+            Blas::Gemv(matrix.GetLayout(), MatrixTranspose::noTranspose, static_cast<int>(matrix.NumRows()), static_cast<int>(matrix.NumColumns()), scalarA, matrix.GetConstDataPointer(), static_cast<int>(matrix.GetIncrement()), vectorA.GetConstDataPointer(), static_cast<int>(vectorA.GetIncrement()), scalarB, vectorB.GetDataPointer(), static_cast<int>(vectorB.GetIncrement()));
+        }
+
+        template <typename ElementType, MatrixLayout layout>
+        void MatrixOperations<ImplementationType::openBlas>::MultiplyScaleAddUpdate(ElementType scalarA, ConstRowVectorReference<ElementType> vectorA, ConstMatrixReference<ElementType, layout> matrix, ElementType scalarB, RowVectorReference<ElementType> vectorB)
+        {
+            MultiplyScaleAddUpdate(scalarA, matrix.Transpose(), vectorA.Transpose(), scalarB, vectorB.Transpose());
+        }
+
+        template <typename ElementType, MatrixLayout layoutA, MatrixLayout layoutB, MatrixLayout layoutC>
+        void MatrixOperations<ImplementationType::openBlas>::MultiplyScaleAddUpdate(ElementType scalarA, ConstMatrixReference<ElementType, layoutA> matrixA, ConstMatrixReference<ElementType, layoutB> matrixB, ElementType scalarB, MatrixReference<ElementType, layoutC> matrixC)
+        {
+            MatrixLayout order = matrixC.GetLayout();
+            MatrixTranspose transposeA = matrixA.GetLayout() == order ? MatrixTranspose::noTranspose : MatrixTranspose::transpose;
+            MatrixTranspose transposeB = matrixB.GetLayout() == order ? MatrixTranspose::noTranspose : MatrixTranspose::transpose;
+
+            Blas::Gemm(order, transposeA, transposeB, static_cast<int>(matrixA.NumRows()), static_cast<int>(matrixB.NumColumns()), static_cast<int>(matrixA.NumColumns()), scalarA, matrixA.GetConstDataPointer(), static_cast<int>(matrixA.GetIncrement()), matrixB.GetConstDataPointer(), static_cast<int>(matrixB.GetIncrement()), scalarB, matrixC.GetDataPointer(), static_cast<int>(matrixC.GetIncrement()));
+        }
+#endif
+    } // namespace Internal
+} // namespace math
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/math/include/Tensor.h b/libraries/math/include/Tensor.h
index 6f983749f..dbf6fbcdd 100644
--- a/libraries/math/include/Tensor.h
+++ b/libraries/math/include/Tensor.h
@@ -960,4 +960,806 @@ namespace math
     using ConstColumnRowChannelTensorReference = ConstTensorReference<ElementType, Dimension::column, Dimension::row, Dimension::channel>;
 } // namespace math
 } // namespace ell
-#include "../tcc/Tensor.tcc"
+
+#pragma region implementation
+
+#include <algorithm>
+
+namespace ell
+{
+namespace math
+{
+    //
+    // TensorMatrixSlicers
+    //
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2, Dimension rowDimension, Dimension columnDimension>
+    struct TensorMatrixSlicer;
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    struct TensorMatrixSlicer<ElementType, dimension0, dimension1, dimension2, dimension0, dimension1>
+    {
+        using SliceType = ColumnMatrixReference<ElementType>;
+        using ConstSliceType = ConstColumnMatrixReference<ElementType>;
+
+        inline static size_t NumSlices(TensorShape shape)
+        {
+            return shape.GetValue<dimension2>();
+        }
+
+        static ConstSliceType GetConstSlice(const ElementType* pData, TensorShape shape, size_t increment1, size_t increment2, size_t index)
+        {
+            DEBUG_THROW(index >= NumSlices(shape), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "index exceeds tensor dimensions."));
+
+            return ConstSliceType(pData + index * increment2, shape.GetValue<dimension0>(), shape.GetValue<dimension1>(), increment1);
+        }
+
+        static SliceType GetSlice(ElementType* pData, TensorShape shape, size_t increment1, size_t increment2, size_t index)
+        {
+            DEBUG_THROW(index >= NumSlices(shape), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "index exceeds tensor dimensions."));
+
+            return SliceType(pData + index * increment2, shape.GetValue<dimension0>(), shape.GetValue<dimension1>(), increment1);
+        }
+    };
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    struct TensorMatrixSlicer<ElementType, dimension0, dimension1, dimension2, dimension0, dimension2>
+    {
+        using SliceType = ColumnMatrixReference<ElementType>;
+        using ConstSliceType = ConstColumnMatrixReference<ElementType>;
+
+        inline static size_t NumSlices(TensorShape shape)
+        {
+            return shape.GetValue<dimension1>();
+        }
+
+        static ConstSliceType GetConstSlice(const ElementType* pData, TensorShape shape, size_t increment1, size_t increment2, size_t index)
+        {
+            DEBUG_THROW(index >= NumSlices(shape), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "index exceeds tensor dimensions."));
+
+            return ConstSliceType(pData + index * increment1, shape.GetValue<dimension0>(), shape.GetValue<dimension2>(), increment2);
+        }
+
+        static SliceType GetSlice(ElementType* pData, TensorShape shape, size_t increment1, size_t increment2, size_t index)
+        {
+            DEBUG_THROW(index >= NumSlices(shape), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "index exceeds tensor dimensions."));
+
+            return SliceType(pData + index * increment1, shape.GetValue<dimension0>(), shape.GetValue<dimension2>(), increment2);
+        }
+    };
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    struct TensorMatrixSlicer<ElementType, dimension0, dimension1, dimension2, dimension1, dimension0>
+    {
+        using SliceType = RowMatrixReference<ElementType>;
+        using ConstSliceType = ConstRowMatrixReference<ElementType>;
+
+        inline static size_t NumSlices(TensorShape shape)
+        {
+            return shape.GetValue<dimension2>();
+        }
+
+        static ConstSliceType GetConstSlice(const ElementType* pData, TensorShape shape, size_t increment1, size_t increment2, size_t index)
+        {
+            DEBUG_THROW(index >= NumSlices(shape), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "index exceeds tensor dimensions."));
+
+            return ConstSliceType(pData + index * increment2, shape.GetValue<dimension1>(), shape.GetValue<dimension0>(), increment1);
+        }
+
+        static SliceType GetSlice(ElementType* pData, TensorShape shape, size_t increment1, size_t increment2, size_t index)
+        {
+            DEBUG_THROW(index >= NumSlices(shape), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "index exceeds tensor dimensions."));
+
+            return SliceType(pData + index * increment2, shape.GetValue<dimension1>(), shape.GetValue<dimension0>(), increment1);
+        }
+    };
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    struct TensorMatrixSlicer<ElementType, dimension0, dimension1, dimension2, dimension2, dimension0>
+    {
+        using SliceType = RowMatrixReference<ElementType>;
+        using ConstSliceType = ConstRowMatrixReference<ElementType>;
+
+        inline static size_t NumSlices(TensorShape shape)
+        {
+            return shape.GetValue<dimension1>();
+        }
+
+        static ConstSliceType GetConstSlice(const ElementType* pData, TensorShape shape, size_t increment1, size_t increment2, size_t index)
+        {
+            DEBUG_THROW(index >= NumSlices(shape), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "index exceeds tensor dimensions."));
+
+            return ConstSliceType(pData + index * increment1, shape.GetValue<dimension2>(), shape.GetValue<dimension0>(), increment2);
+        }
+
+        static SliceType GetSlice(ElementType* pData, TensorShape shape, size_t increment1, size_t increment2, size_t index)
+        {
+            DEBUG_THROW(index >= NumSlices(shape), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "index exceeds tensor dimensions."));
+
+            return SliceType(pData + index * increment1, shape.GetValue<dimension2>(), shape.GetValue<dimension0>(), increment2);
+        }
+    };
+
+    //
+    // TensorVectorSlicers
+    //
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2, Dimension vectorDimension>
+    struct TensorVectorSlicer;
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    struct TensorVectorSlicer<ElementType, dimension0, dimension1, dimension2, dimension0>
+    {
+        using SliceType = ColumnVectorReference<ElementType>;
+        using ConstSliceType = ConstColumnVectorReference<ElementType>;
+
+        static inline size_t NumSlices(TensorShape shape)
+        {
+            return shape.GetValue<dimension1>() * shape.GetValue<dimension2>();
+        }
+
+        static ConstSliceType GetConstSlice(const ElementType* pData, TensorShape shape, size_t increment1, size_t increment2, size_t index1, size_t index2)
+        {
+            constexpr bool shouldSwap = dimension1 > dimension2;
+            if /*constexpr*/ (shouldSwap)
+            {
+                std::swap(index1, index2);
+            }
+
+            DEBUG_THROW(index1 >= shape.GetValue<dimension1>(), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "index1 exceeds tensor dimensions."));
+            DEBUG_THROW(index2 >= shape.GetValue<dimension2>(), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "index2 exceeds tensor dimensions."));
+
+            return ConstSliceType(pData + index1 * increment1 + index2 * increment2, shape.GetValue<dimension0>(), 1);
+        }
+
+        static SliceType GetSlice(ElementType* pData, TensorShape shape, size_t increment1, size_t increment2, size_t index1, size_t index2)
+        {
+            constexpr bool shouldSwap = dimension1 > dimension2;
+            if /*constexpr*/ (shouldSwap)
+            {
+                std::swap(index1, index2);
+            }
+
+            DEBUG_THROW(index1 >= shape.GetValue<dimension1>(), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "index1 exceeds tensor dimensions."));
+            DEBUG_THROW(index2 >= shape.GetValue<dimension2>(), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "index2 exceeds tensor dimensions."));
+
+            return SliceType(pData + index1 * increment1 + index2 * increment2, shape.GetValue<dimension0>(), 1);
+        }
+    };
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    struct TensorVectorSlicer<ElementType, dimension0, dimension1, dimension2, dimension1>
+    {
+        using SliceType = ColumnVectorReference<ElementType>;
+        using ConstSliceType = ConstColumnVectorReference<ElementType>;
+
+        static inline size_t NumSlices(TensorShape shape)
+        {
+            return shape.GetValue<dimension0>() * shape.GetValue<dimension2>();
+        }
+
+        static ConstSliceType GetConstSlice(const ElementType* pData, TensorShape shape, size_t increment1, size_t increment2, size_t index1, size_t index2)
+        {
+            constexpr bool shouldSwap = dimension0 > dimension2;
+            if /*constexpr*/ (shouldSwap)
+            {
+                std::swap(index1, index2);
+            }
+
+            DEBUG_THROW(index1 >= shape.GetValue<dimension0>(), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "index1 exceeds tensor dimensions."));
+            DEBUG_THROW(index2 >= shape.GetValue<dimension2>(), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "index2 exceeds tensor dimensions."));
+
+            return ConstSliceType(pData + index1 + index2 * increment2, shape.GetValue<dimension1>(), increment1);
+        }
+
+        static SliceType GetSlice(ElementType* pData, TensorShape shape, size_t increment1, size_t increment2, size_t index1, size_t index2)
+        {
+            constexpr bool shouldSwap = dimension0 > dimension2;
+            if /*constexpr*/ (shouldSwap)
+            {
+                std::swap(index1, index2);
+            }
+
+            DEBUG_THROW(index1 >= shape.GetValue<dimension0>(), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "index1 exceeds tensor dimensions."));
+            DEBUG_THROW(index2 >= shape.GetValue<dimension2>(), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "index2 exceeds tensor dimensions."));
+
+            return SliceType(pData + index1 + index2 * increment2, shape.GetValue<dimension1>(), increment1);
+        }
+    };
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    struct TensorVectorSlicer<ElementType, dimension0, dimension1, dimension2, dimension2>
+    {
+        using SliceType = ColumnVectorReference<ElementType>;
+        using ConstSliceType = ConstColumnVectorReference<ElementType>;
+
+        static inline size_t NumSlices(TensorShape shape)
+        {
+            return shape.GetValue<dimension0>() * shape.GetValue<dimension1>();
+        }
+
+        static ConstSliceType GetConstSlice(const ElementType* pData, TensorShape shape, size_t increment1, size_t increment2, size_t index1, size_t index2)
+        {
+            constexpr bool shouldSwap = dimension0 > dimension1;
+            if /*constexpr*/ (shouldSwap)
+            {
+                std::swap(index1, index2);
+            }
+
+            DEBUG_THROW(index1 >= shape.GetValue<dimension0>(), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "index1 exceeds tensor dimensions."));
+            DEBUG_THROW(index2 >= shape.GetValue<dimension1>(), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "index2 exceeds tensor dimensions."));
+
+            return ConstSliceType(pData + index1 + index2 * increment1, shape.GetValue<dimension2>(), increment2);
+        }
+
+        static SliceType GetSlice(ElementType* pData, TensorShape shape, size_t increment1, size_t increment2, size_t index1, size_t index2)
+        {
+            constexpr bool shouldSwap = dimension0 > dimension1;
+            if /*constexpr*/ (shouldSwap)
+            {
+                std::swap(index1, index2);
+            }
+
+            DEBUG_THROW(index1 >= shape.GetValue<dimension0>(), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "index1 exceeds tensor dimensions."));
+            DEBUG_THROW(index2 >= shape.GetValue<dimension1>(), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "index2 exceeds tensor dimensions."));
+
+            return SliceType(pData + index1 + index2 * increment1, shape.GetValue<dimension2>(), increment2);
+        }
+    };
+
+    //
+    // ConstTensorReference
+    //
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    ConstTensorReference<ElementType, dimension0, dimension1, dimension2>::ConstTensorReference(TensorShape shape) :
+        ConstTensorReference<ElementType, dimension0, dimension1, dimension2>(nullptr, shape)
+    {
+    }
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    ConstTensorReference<ElementType, dimension0, dimension1, dimension2>::ConstTensorReference(const ElementType* pData, TensorShape shape) :
+        _pData(pData),
+        _shape(shape)
+    {
+        _increment1 = shape.GetValue<dimension0>();
+        _increment2 = _increment1 * shape.GetValue<dimension1>();
+    }
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    template <size_t dimensionIndex>
+    size_t ConstTensorReference<ElementType, dimension0, dimension1, dimension2>::GetSize() const
+    {
+        constexpr auto dimension = std::get<dimensionIndex>(std::make_tuple(dimension0, dimension1, dimension2));
+        return _shape.GetValue<dimension>();
+    }
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    template <Dimension rowDimension, Dimension columnDimension>
+    size_t ConstTensorReference<ElementType, dimension0, dimension1, dimension2>::NumSlices() const
+    {
+        return TensorMatrixSlicer<ElementType, dimension0, dimension1, dimension2, rowDimension, columnDimension>::NumSlices(_shape);
+    }
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    template <Dimension dimension>
+    size_t ConstTensorReference<ElementType, dimension0, dimension1, dimension2>::NumSlices() const
+    {
+        return TensorVectorSlicer<ElementType, dimension0, dimension1, dimension2, dimension>::NumSlices(_shape);
+    }
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    size_t ConstTensorReference<ElementType, dimension0, dimension1, dimension2>::NumPrimarySlices() const
+    {
+        return GetSize2();
+    }
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    ElementType ConstTensorReference<ElementType, dimension0, dimension1, dimension2>::operator()(size_t row, size_t column, size_t channel) const
+    {
+        return operator()({ row, column, channel });
+    }
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    ElementType ConstTensorReference<ElementType, dimension0, dimension1, dimension2>::operator()(TensorCoordinate coordinate) const
+    {
+        return GetConstDataPointer()[this->GetOffset(coordinate)];
+    }
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    std::vector<ElementType> ConstTensorReference<ElementType, dimension0, dimension1, dimension2>::ToArray() const
+    {
+        if (!IsContiguous())
+        {
+            auto resultIncrement0 = _shape.GetValue<dimension0>();
+            auto resultIncrement1 = resultIncrement0 * _shape.GetValue<dimension1>();
+
+            std::vector<ElementType> result(NumRows() * NumColumns() * NumChannels());
+            for (size_t i = 0; i < NumRows(); ++i)
+            {
+                for (size_t j = 0; j < NumColumns(); ++j)
+                {
+                    for (size_t k = 0; k < NumChannels(); ++k)
+                    {
+                        auto value = (*this)(i, j, k);
+                        auto coordinate = TensorCoordinate(i, j, k);
+                        auto resultIndex = coordinate.GetValue<dimension0>() + coordinate.GetValue<dimension1>() * resultIncrement0 + coordinate.GetValue<dimension2>() * resultIncrement1;
+                        result[resultIndex] = value;
+                    }
+                }
+            }
+            return result;
+        }
+        return { GetConstDataPointer(), GetConstDataPointer() + Size() };
+    }
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    void ConstTensorReference<ElementType, dimension0, dimension1, dimension2>::Swap(ConstTensorReference<ElementType, dimension0, dimension1, dimension2>& other)
+    {
+        std::swap(_pData, other._pData);
+        std::swap(_shape, other._shape);
+        std::swap(_increment1, other._increment1);
+        std::swap(_increment2, other._increment2);
+    }
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    bool ConstTensorReference<ElementType, dimension0, dimension1, dimension2>::IsContiguous() const
+    {
+        return GetSize0() == GetIncrement1() && GetSize0() * GetSize1() == GetIncrement2();
+    }
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    template <Dimension otherDimension0, Dimension otherDimension1, Dimension otherDimension2>
+    bool ConstTensorReference<ElementType, dimension0, dimension1, dimension2>::IsEqual(ConstTensorReference<ElementType, otherDimension0, otherDimension1, otherDimension2> other, ElementType tolerance) const
+    {
+        if (NumRows() != other.NumRows() || NumColumns() != other.NumColumns() || NumChannels() != other.NumChannels())
+        {
+            return false;
+        }
+
+        for (size_t i = 0; i < NumRows(); ++i)
+        {
+            for (size_t j = 0; j < NumColumns(); ++j)
+            {
+                for (size_t k = 0; k < NumChannels(); ++k)
+                {
+                    auto diff = (*this)(i, j, k) - other(i, j, k);
+                    if (diff > tolerance || -diff > tolerance)
+                    {
+                        return false;
+                    }
+                }
+            }
+        }
+        return true;
+    }
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    template <Dimension otherDimension0, Dimension otherDimension1, Dimension otherDimension2>
+    bool ConstTensorReference<ElementType, dimension0, dimension1, dimension2>::operator==(const ConstTensorReference<ElementType, otherDimension0, otherDimension1, otherDimension2>& other) const
+    {
+        return IsEqual(other);
+    }
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    template <Dimension otherDimension0, Dimension otherDimension1, Dimension otherDimension2>
+    bool ConstTensorReference<ElementType, dimension0, dimension1, dimension2>::operator!=(const ConstTensorReference<ElementType, otherDimension0, otherDimension1, otherDimension2>& other) const
+    {
+        return !IsEqual(other);
+    }
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    ConstTensorReference<ElementType, dimension0, dimension1, dimension2> ConstTensorReference<ElementType, dimension0, dimension1, dimension2>::GetSubTensor(size_t firstRow, size_t firstColumn, size_t firstChannel, size_t numRows, size_t numColumns, size_t numChannels) const
+    {
+        return GetSubTensor({ firstRow, firstColumn, firstChannel }, { numRows, numColumns, numChannels });
+    }
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    ConstTensorReference<ElementType, dimension0, dimension1, dimension2> ConstTensorReference<ElementType, dimension0, dimension1, dimension2>::GetSubTensor(TensorCoordinate firstCoordinate, TensorShape shape) const
+    {
+        DEBUG_THROW(firstCoordinate.GetRowIndex() + shape.NumRows() > NumRows() || firstCoordinate.GetColumnIndex() + shape.NumColumns() > NumColumns() || firstCoordinate.GetChannelIndex() + shape.NumChannels() > NumChannels(), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "subtensor exceeds tensor dimensions."));
+
+        return ConstTensorReference(GetConstDataPointer() + GetOffset(firstCoordinate), shape, GetIncrement1(), GetIncrement2());
+    }
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    template <Dimension rowDimension, Dimension columnDimension>
+    auto ConstTensorReference<ElementType, dimension0, dimension1, dimension2>::GetSlice(size_t index) const -> typename TensorMatrixSlicer<ElementType, dimension0, dimension1, dimension2, rowDimension, columnDimension>::ConstSliceType
+    {
+        return TensorMatrixSlicer<ElementType, dimension0, dimension1, dimension2, rowDimension, columnDimension>::GetConstSlice(GetConstDataPointer(), GetShape(), GetIncrement1(), GetIncrement2(), index);
+    }
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    template <Dimension dimension>
+    auto ConstTensorReference<ElementType, dimension0, dimension1, dimension2>::GetSlice(size_t index1, size_t index2) const -> typename TensorVectorSlicer<ElementType, dimension0, dimension1, dimension2, dimension>::ConstSliceType
+    {
+        return TensorVectorSlicer<ElementType, dimension0, dimension1, dimension2, dimension>::GetConstSlice(GetConstDataPointer(), GetShape(), GetIncrement1(), GetIncrement2(), index1, index2);
+    }
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2> // pData -> GetDataPointer
+    auto ConstTensorReference<ElementType, dimension0, dimension1, dimension2>::GetPrimarySlice(size_t index) const -> typename TensorMatrixSlicer<ElementType, dimension0, dimension1, dimension2, dimension0, dimension1>::ConstSliceType
+    {
+        return TensorMatrixSlicer<ElementType, dimension0, dimension1, dimension2, dimension0, dimension1>::GetConstSlice(GetConstDataPointer(), GetShape(), GetIncrement1(), GetIncrement2(), index);
+    }
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    ConstRowVectorReference<ElementType> ConstTensorReference<ElementType, dimension0, dimension1, dimension2>::ReferenceAsVector() const
+    {
+        DEBUG_THROW(GetSize0() != GetIncrement1() || GetSize0() * GetSize1() != GetIncrement2(), utilities::LogicException(utilities::LogicExceptionErrors::illegalState, "Can only flatten a tensor when alll the dimensions are full"));
+
+        return ConstRowVectorReference<ElementType>(GetConstDataPointer(), Size(), 1);
+    }
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    ConstRowMatrixReference<ElementType> ConstTensorReference<ElementType, dimension0, dimension1, dimension2>::ReferenceAsMatrix() const
+    {
+        DEBUG_THROW(GetSize0() != GetIncrement1(), utilities::LogicException(utilities::LogicExceptionErrors::illegalState, "Can only flatten a tensor when the first dimension is full"));
+
+        return ConstRowMatrixReference<ElementType>(GetConstDataPointer(), GetSize2(), GetSize0() * GetSize1(), GetIncrement2());
+    }
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    size_t ConstTensorReference<ElementType, dimension0, dimension1, dimension2>::GetOffset(TensorCoordinate coordinate) const
+    {
+        DEBUG_THROW(coordinate.GetRowIndex() >= NumRows() || coordinate.GetColumnIndex() >= NumColumns() || coordinate.GetChannelIndex() >= NumChannels(), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, std::string("index exceeds tensor size in ConstTensorReference::GetOffset().") + " Tensor size: (" + std::to_string(NumRows()) + " x " + std::to_string(NumColumns()) + " x " + std::to_string(NumChannels()) + "), "
+                                                                                                                                                                                                                                                                                                                                                                                                                                                       " index: (" +
+                                                                                                                                                                                                                                           std::to_string(coordinate.GetRowIndex()) + ", " + std::to_string(coordinate.GetColumnIndex()) + ", " + std::to_string(coordinate.GetChannelIndex()) + ")"));
+
+        return coordinate.GetValue<dimension0>() + coordinate.GetValue<dimension1>() * GetIncrement1() + coordinate.GetValue<dimension2>() * GetIncrement2();
+    }
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    ConstTensorReference<ElementType, dimension0, dimension1, dimension2>::ConstTensorReference(const ElementType* pData, TensorShape shape, size_t increment1, size_t increment2) :
+        _pData(pData),
+        _shape(shape),
+        _increment1(increment1),
+        _increment2(increment2)
+    {}
+
+    template <Dimension rowDimension, Dimension columnDimension, typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    size_t NumSlices(ConstTensorReference<ElementType, dimension0, dimension1, dimension2> tensor)
+    {
+        return tensor.template NumSlices<rowDimension, columnDimension>();
+    }
+
+    template <Dimension dimension, typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    size_t NumSlices(ConstTensorReference<ElementType, dimension0, dimension1, dimension2> tensor)
+    {
+        return tensor.template NumSlices<dimension>();
+    }
+
+    template <Dimension rowDimension, Dimension columnDimension, typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    auto GetSlice(ConstTensorReference<ElementType, dimension0, dimension1, dimension2> tensor, size_t index)
+    {
+        return tensor.template GetSlice<rowDimension, columnDimension>(index);
+    }
+
+    template <Dimension dimension, typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    auto GetSlice(ConstTensorReference<ElementType, dimension0, dimension1, dimension2> tensor, size_t index1, size_t index2)
+    {
+        return tensor.template GetSlice<dimension>(index1, index2);
+    }
+
+    //
+    // TensorReference
+    //
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    TensorReference<ElementType, dimension0, dimension1, dimension2>::TensorReference(TensorShape shape) :
+        ConstTensorRef(shape)
+    {}
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    TensorReference<ElementType, dimension0, dimension1, dimension2>::TensorReference(ElementType* pData, size_t numRows, size_t numColumns, size_t numChannels) :
+        ConstTensorRef(pData, TensorShape{ numRows, numColumns, numChannels })
+    {}
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    ElementType& TensorReference<ElementType, dimension0, dimension1, dimension2>::operator()(size_t row, size_t column, size_t channel)
+    {
+        return operator()({ row, column, channel });
+    }
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    ElementType& TensorReference<ElementType, dimension0, dimension1, dimension2>::operator()(TensorCoordinate coordinate)
+    {
+        return GetDataPointer()[this->GetOffset(coordinate)];
+    }
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    void TensorReference<ElementType, dimension0, dimension1, dimension2>::Swap(TensorReference<ElementType, dimension0, dimension1, dimension2>& other)
+    {
+        ConstTensorRef::Swap(other);
+    }
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    void TensorReference<ElementType, dimension0, dimension1, dimension2>::CopyFrom(ConstTensorReference<ElementType, dimension0, dimension1, dimension2> other)
+    {
+        DEBUG_CHECK_SIZES(this->NumRows() != other.NumRows(), "Tensors must have the same number of rows");
+        DEBUG_CHECK_SIZES(this->NumColumns() != other.NumColumns(), "Tensors must have the same number of columns");
+        DEBUG_CHECK_SIZES(this->NumChannels() != other.NumChannels(), "Tensors must have the same number of channels");
+
+        for (size_t i = 0; i < this->NumPrimarySlices(); ++i)
+        {
+            auto slice = other.GetPrimarySlice(i);
+            GetPrimarySlice(i).CopyFrom(slice);
+        }
+    }
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    void TensorReference<ElementType, dimension0, dimension1, dimension2>::CopyFrom(ConstTensorReference<ElementType, dimension0, dimension2, dimension1> other)
+    {
+        DEBUG_CHECK_SIZES(this->NumRows() != other.NumRows(), "Tensors must have the same number of rows");
+        DEBUG_CHECK_SIZES(this->NumColumns() != other.NumColumns(), "Tensors must have the same number of columns");
+        DEBUG_CHECK_SIZES(this->NumChannels() != other.NumChannels(), "Tensors must have the same number of channels");
+
+        for (size_t i = 0; i < NumSlices<dimension0, dimension1>(*this); ++i)
+        {
+            this->template GetSlice<dimension0, dimension1>(i).CopyFrom(GetSlice<dimension0, dimension1>(other, i));
+        }
+    }
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    template <Dimension otherDimension0, Dimension otherDimension1, Dimension otherDimension2>
+    void TensorReference<ElementType, dimension0, dimension1, dimension2>::CopyFrom(ConstTensorReference<ElementType, otherDimension0, otherDimension1, otherDimension2> other)
+    {
+        DEBUG_CHECK_SIZES(this->NumRows() != other.NumRows(), "Tensors must have the same number of rows");
+        DEBUG_CHECK_SIZES(this->NumColumns() != other.NumColumns(), "Tensors must have the same number of columns");
+        DEBUG_CHECK_SIZES(this->NumChannels() != other.NumChannels(), "Tensors must have the same number of channels");
+
+        for (size_t i = 0; i < math::NumSlices<dimension0, otherDimension0>(*this); ++i)
+        {
+            auto thisSlice = this->template GetSlice<dimension0, otherDimension0>(i);
+            auto otherSlice = other.template GetSlice<dimension0, otherDimension0>(i);
+            thisSlice.CopyFrom(otherSlice);
+        }
+    }
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    void TensorReference<ElementType, dimension0, dimension1, dimension2>::Fill(ElementType value)
+    {
+        for (size_t i = 0; i < this->NumPrimarySlices(); ++i)
+        {
+            auto slice = GetPrimarySlice(i);
+            slice.Fill(value);
+        }
+    }
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    template <typename GeneratorType>
+    void TensorReference<ElementType, dimension0, dimension1, dimension2>::Generate(GeneratorType generator)
+    {
+        for (size_t i = 0; i < this->NumPrimarySlices(); ++i)
+        {
+            auto slice = GetPrimarySlice(i);
+            slice.Generate(generator);
+        }
+    }
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    template <typename TransformationType>
+    void TensorReference<ElementType, dimension0, dimension1, dimension2>::Transform(TransformationType transformation)
+    {
+        for (size_t i = 0; i < this->NumPrimarySlices(); ++i)
+        {
+            auto slice = GetPrimarySlice(i);
+            slice.Transform(transformation);
+        }
+    }
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    TensorReference<ElementType, dimension0, dimension1, dimension2> TensorReference<ElementType, dimension0, dimension1, dimension2>::GetSubTensor(size_t firstRow, size_t firstColumn, size_t firstChannel, size_t numRows, size_t numColumns, size_t numChannels)
+    {
+        return GetSubTensor({ firstRow, firstColumn, firstChannel }, { numRows, numColumns, numChannels });
+    }
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    TensorReference<ElementType, dimension0, dimension1, dimension2> TensorReference<ElementType, dimension0, dimension1, dimension2>::GetSubTensor(TensorCoordinate firstCoordinate, TensorShape shape)
+    {
+        DEBUG_THROW(firstCoordinate.GetRowIndex() + shape.NumRows() > this->NumRows() || firstCoordinate.GetColumnIndex() + shape.NumColumns() > this->NumColumns() || firstCoordinate.GetChannelIndex() + shape.NumChannels() > this->NumChannels(), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "subtensor exceeds tensor dimensions."));
+
+        return TensorReference(GetDataPointer() + this->GetOffset(firstCoordinate), shape, this->GetIncrement1(), this->GetIncrement2());
+    }
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    template <Dimension rowDimension, Dimension columnDimension>
+    auto TensorReference<ElementType, dimension0, dimension1, dimension2>::GetSlice(size_t index) -> typename TensorMatrixSlicer<ElementType, dimension0, dimension1, dimension2, rowDimension, columnDimension>::SliceType
+    {
+        return TensorMatrixSlicer<ElementType, dimension0, dimension1, dimension2, rowDimension, columnDimension>::GetSlice(GetDataPointer(), this->GetShape(), this->GetIncrement1(), this->GetIncrement2(), index);
+    }
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    template <Dimension dimension>
+    auto TensorReference<ElementType, dimension0, dimension1, dimension2>::GetSlice(size_t index1, size_t index2) -> typename TensorVectorSlicer<ElementType, dimension0, dimension1, dimension2, dimension>::SliceType
+    {
+        return TensorVectorSlicer<ElementType, dimension0, dimension1, dimension2, dimension>::GetSlice(GetDataPointer(), this->GetShape(), this->GetIncrement1(), this->GetIncrement2(), index1, index2);
+    }
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    auto TensorReference<ElementType, dimension0, dimension1, dimension2>::GetPrimarySlice(size_t index) -> typename TensorMatrixSlicer<ElementType, dimension0, dimension1, dimension2, dimension0, dimension1>::SliceType
+    {
+        return TensorMatrixSlicer<ElementType, dimension0, dimension1, dimension2, dimension0, dimension1>::GetSlice(GetDataPointer(), this->GetShape(), this->GetIncrement1(), this->GetIncrement2(), index);
+    }
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    RowVectorReference<ElementType> TensorReference<ElementType, dimension0, dimension1, dimension2>::ReferenceAsVector()
+    {
+        DEBUG_THROW(this->GetSize0() != this->GetIncrement1() || this->GetSize0() * this->GetSize1() != this->GetIncrement2(), utilities::LogicException(utilities::LogicExceptionErrors::illegalState, "Can only flatten a tensor to vector when alll the dimensions are full"));
+
+        return RowVectorReference<ElementType>(GetDataPointer(), this->Size(), 1);
+    }
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    RowMatrixReference<ElementType> TensorReference<ElementType, dimension0, dimension1, dimension2>::ReferenceAsMatrix()
+    {
+        DEBUG_THROW(this->GetSize0() != this->GetIncrement1(), utilities::LogicException(utilities::LogicExceptionErrors::illegalState, "Can only flatten a tensor when the first dimension is full"));
+
+        return RowMatrixReference<ElementType>(GetDataPointer(), this->GetSize2(), this->GetSize0() * this->GetSize1(), this->GetIncrement2());
+    }
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    TensorReference<ElementType, dimension0, dimension1, dimension2>::TensorReference(ElementType* pData, TensorShape shape, size_t increment1, size_t increment2) :
+        ConstTensorRef(pData, shape, increment1, increment2)
+    {}
+
+    template <Dimension rowDimension, Dimension columnDimension, typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    size_t NumSlices(TensorReference<ElementType, dimension0, dimension1, dimension2> tensor)
+    {
+        return tensor.template NumSlices<rowDimension, columnDimension>();
+    }
+
+    template <Dimension dimension, typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    size_t NumSlices(TensorReference<ElementType, dimension0, dimension1, dimension2> tensor)
+    {
+        return tensor.template NumSlices<dimension>();
+    }
+
+    template <Dimension rowDimension, Dimension columnDimension, typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    auto GetSlice(TensorReference<ElementType, dimension0, dimension1, dimension2> tensor, size_t index)
+    {
+        return tensor.template GetSlice<rowDimension, columnDimension>(index);
+    }
+
+    template <Dimension dimension, typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    auto GetSlice(TensorReference<ElementType, dimension0, dimension1, dimension2> tensor, size_t index1, size_t index2)
+    {
+        return tensor.template GetSlice<dimension>(index1, index2);
+    }
+
+    //
+    // Tensor
+    //
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    Tensor<ElementType, dimension0, dimension1, dimension2>::Tensor() :
+        Tensor(TensorShape{ 0, 0, 0 })
+    {
+    }
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    Tensor<ElementType, dimension0, dimension1, dimension2>::Tensor(size_t numRows, size_t numColumns, size_t numChannels) :
+        TensorRef(TensorShape(numRows, numColumns, numChannels)),
+        _data(numRows * numColumns * numChannels)
+    {
+        this->_pData = _data.data();
+    }
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    Tensor<ElementType, dimension0, dimension1, dimension2>::Tensor(size_t numRows, size_t numColumns, size_t numChannels, const std::vector<ElementType>& data) :
+        TensorRef(TensorShape{ numRows, numColumns, numChannels }),
+        _data(data)
+    {
+        this->_pData = _data.data();
+    }
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    Tensor<ElementType, dimension0, dimension1, dimension2>::Tensor(size_t numRows, size_t numColumns, size_t numChannels, std::vector<ElementType>&& data) :
+        TensorRef(TensorShape{ numRows, numColumns, numChannels }),
+        _data(std::move(data))
+    {
+        this->_pData = _data.data();
+    }
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    Tensor<ElementType, dimension0, dimension1, dimension2>::Tensor(TensorShape shape) :
+        TensorRef(shape),
+        _data(shape.Size())
+    {
+        this->_pData = _data.data();
+    }
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    Tensor<ElementType, dimension0, dimension1, dimension2>::Tensor(const Tensor<ElementType, dimension0, dimension1, dimension2>& other) :
+        TensorRef(other),
+        _data(other._data)
+    {
+        this->_pData = _data.data();
+    }
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    template <Dimension otherDimension0, Dimension otherDimension1, Dimension otherDimension2>
+    Tensor<ElementType, dimension0, dimension1, dimension2>::Tensor(ConstTensorReference<ElementType, otherDimension0, otherDimension1, otherDimension2> other) :
+        TensorRef(TensorShape{ other.NumRows(), other.NumColumns(), other.NumChannels() }),
+        _data(other.Size())
+    {
+        this->_pData = _data.data();
+        this->CopyFrom(other);
+    }
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    Tensor<ElementType, dimension0, dimension1, dimension2>::Tensor(std::initializer_list<std::initializer_list<std::initializer_list<ElementType>>> list) :
+        TensorRef(TensorShape{ list.size(), list.begin()->size(), list.begin()->begin()->size() }),
+        _data(list.size() * list.begin()->size() * list.begin()->begin()->size())
+    {
+        this->_pData = _data.data();
+        auto numColumns = list.begin()->size();
+        auto numChannels = list.begin()->begin()->size();
+        DEBUG_USED(numColumns, numChannels);
+
+        size_t i = 0;
+        for (auto rowIter = list.begin(); rowIter < list.end(); ++rowIter)
+        {
+            DEBUG_CHECK_SIZES(rowIter->size() != numColumns, "incorrect number of elements in initializer list");
+
+            size_t j = 0;
+            for (auto columnIter = rowIter->begin(); columnIter < rowIter->end(); ++columnIter)
+            {
+                DEBUG_CHECK_SIZES(columnIter->size() != numChannels, "incorrect number of elements in initializer list");
+
+                size_t k = 0;
+                for (auto channelIter = columnIter->begin(); channelIter < columnIter->end(); ++channelIter)
+                {
+                    (*this)(i, j, k) = *channelIter;
+                    ++k;
+                }
+                ++j;
+            }
+            ++i;
+        }
+    }
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    Tensor<ElementType, dimension0, dimension1, dimension2>& Tensor<ElementType, dimension0, dimension1, dimension2>::operator=(Tensor<ElementType, dimension0, dimension1, dimension2> other)
+    {
+        Swap(other);
+        return *this;
+    }
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    void Tensor<ElementType, dimension0, dimension1, dimension2>::Swap(Tensor<ElementType, dimension0, dimension1, dimension2>& other)
+    {
+        TensorRef::Swap(other);
+        std::swap(_data, other._data);
+    }
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    void TensorArchiver::Write(const Tensor<ElementType, dimension0, dimension1, dimension2>& tensor, const std::string& name, utilities::Archiver& archiver)
+    {
+        archiver[GetRowsName(name)] << tensor.NumRows();
+        archiver[GetColumnsName(name)] << tensor.NumColumns();
+        archiver[GetChannelsName(name)] << tensor.NumChannels();
+        archiver[GetValuesName(name)] << tensor.ToArray();
+    }
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    void TensorArchiver::Read(Tensor<ElementType, dimension0, dimension1, dimension2>& tensor, const std::string& name, utilities::Unarchiver& archiver)
+    {
+        size_t rows = 0;
+        size_t columns = 0;
+        size_t channels = 0;
+        std::vector<ElementType> values;
+
+        archiver[GetRowsName(name)] >> rows;
+        archiver[GetColumnsName(name)] >> columns;
+        archiver[GetChannelsName(name)] >> channels;
+        archiver[GetValuesName(name)] >> values;
+
+        Tensor<ElementType, dimension0, dimension1, dimension2> value(rows, columns, channels, std::move(values));
+
+        tensor = std::move(value);
+    }
+
+} // namespace math
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/math/include/TensorOperations.h b/libraries/math/include/TensorOperations.h
index 26727fb60..27f890f04 100644
--- a/libraries/math/include/TensorOperations.h
+++ b/libraries/math/include/TensorOperations.h
@@ -230,4 +230,220 @@ namespace math
 } // namespace math
 } // namespace ell
 
-#include "../tcc/TensorOperations.tcc"
+#pragma region implementation
+
+#include "../include/MatrixOperations.h"
+
+#include <utilities/include/Logger.h>
+
+namespace ell
+{
+namespace math
+{
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    void Print(ConstTensorReference<ElementType, dimension0, dimension1, dimension2> tensor, std::ostream& stream, size_t row, size_t column)
+    {
+        stream << "{" << tensor(row, column, 0);
+        for (size_t k = 1; k < tensor.NumChannels(); ++k)
+        {
+            stream << ", " << tensor(row, column, k);
+        }
+        stream << "}";
+    }
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    void Print(ConstTensorReference<ElementType, dimension0, dimension1, dimension2> tensor, std::ostream& stream, size_t row)
+    {
+        stream << "{ ";
+        Print(tensor, stream, row, 0);
+        for (size_t j = 1; j < tensor.NumColumns(); ++j)
+        {
+            stream << ", ";
+            Print(tensor, stream, row, j);
+        }
+        stream << " }";
+    }
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    void Print(ConstTensorReference<ElementType, dimension0, dimension1, dimension2> tensor, std::ostream& stream)
+    {
+        using namespace logging;
+
+        stream << "{ ";
+        Print(tensor, stream, 0);
+        for (size_t i = 1; i < tensor.NumRows(); ++i)
+        {
+            stream << "," << EOL << "  ";
+            Print(tensor, stream, i);
+        }
+        stream << " }" << EOL;
+    }
+
+    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    std::ostream& operator<<(std::ostream& stream, ConstTensorReference<ElementType, dimension0, dimension1, dimension2> tensor)
+    {
+        Print(tensor, stream);
+        return stream;
+    }
+
+    template <typename TensorElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2, typename ScalarType, utilities::IsFundamental<ScalarType>>
+    void operator+=(TensorReference<TensorElementType, dimension0, dimension1, dimension2> tensor, ScalarType scalar)
+    {
+        AddUpdate(static_cast<TensorElementType>(scalar), tensor);
+    }
+
+    template <typename TensorElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2, typename ScalarType, utilities::IsFundamental<ScalarType>>
+    void operator-=(TensorReference<TensorElementType, dimension0, dimension1, dimension2> tensor, ScalarType scalar)
+    {
+        AddUpdate(-static_cast<TensorElementType>(scalar), tensor);
+    }
+
+    template <typename TensorElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2, typename ScalarType, utilities::IsFundamental<ScalarType>>
+    void operator*=(TensorReference<TensorElementType, dimension0, dimension1, dimension2> tensor, ScalarType scalar)
+    {
+        ScaleUpdate(static_cast<TensorElementType>(scalar), tensor);
+    }
+
+    template <typename TensorElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2, typename ScalarType, utilities::IsFundamental<ScalarType>>
+    void operator/=(TensorReference<TensorElementType, dimension0, dimension1, dimension2> tensor, ScalarType scalar)
+    {
+        DEBUG_THROW(scalar == 0, utilities::NumericException(utilities::NumericExceptionErrors::divideByZero, "Divide by zero."));
+
+        ScaleUpdate(static_cast<TensorElementType>(1.0 / scalar), tensor);
+    }
+
+    template <ImplementationType implementation, typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    void ScaleUpdate(ElementType scalar, TensorReference<ElementType, dimension0, dimension1, dimension2> tensor)
+    {
+        for (size_t i = 0; i < tensor.NumPrimarySlices(); ++i)
+        {
+            ScaleUpdate<implementation>(scalar, tensor.GetPrimarySlice(i));
+        }
+    }
+
+    template <Dimension vectorOrientation, ImplementationType implementation, typename ElementType, Dimension dimension0, Dimension dimension1>
+    void ScaleUpdate(UnorientedConstVectorBase<ElementType> vector, TensorReference<ElementType, dimension0, dimension1, vectorOrientation> tensor)
+    {
+        for (size_t i = 0; i < vector.Size(); ++i)
+        {
+            math::ScaleUpdate<implementation>(vector[i], tensor.template GetSlice<dimension0, dimension1>(i));
+        }
+    }
+
+    template <Dimension vectorOrientation, ImplementationType implementation, typename ElementType, Dimension dimension0, Dimension dimension2>
+    void ScaleUpdate(UnorientedConstVectorBase<ElementType> vector, TensorReference<ElementType, dimension0, vectorOrientation, dimension2> tensor)
+    {
+        for (size_t i = 0; i < vector.Size(); ++i)
+        {
+            math::ScaleUpdate<implementation>(vector[i], tensor.template GetSlice<dimension0, dimension2>(i));
+        }
+    }
+
+    template <Dimension vectorOrientation, ImplementationType implementation, typename ElementType, Dimension dimension1, Dimension dimension2>
+    void ScaleUpdate(UnorientedConstVectorBase<ElementType> vector, TensorReference<ElementType, vectorOrientation, dimension1, dimension2> tensor)
+    {
+        for (size_t i = 0; i < tensor.GetSize2(); ++i)
+        {
+            auto M = tensor.GetPrimarySlice(i);
+            for (size_t j = 0; j < tensor.GetSize0(); ++j)
+            {
+                auto u = M.GetRow(j);
+                math::ScaleUpdate<implementation>(vector[j], u);
+            }
+        }
+    }
+
+    template <ImplementationType implementation, typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
+    void AddUpdate(ElementType scalar, TensorReference<ElementType, dimension0, dimension1, dimension2> tensor)
+    {
+        for (size_t i = 0; i < tensor.NumPrimarySlices(); ++i)
+        {
+            AddUpdate<implementation>(scalar, tensor.GetPrimarySlice(i));
+        }
+    }
+
+    template <Dimension vectorOrientation, ImplementationType implementation, typename ElementType, Dimension dimension0, Dimension dimension1>
+    void AddUpdate(UnorientedConstVectorBase<ElementType> vector, TensorReference<ElementType, dimension0, dimension1, vectorOrientation> tensor)
+    {
+        DEBUG_CHECK_SIZES(vector.Size() != tensor.GetSize2(), "vector and tensor dimensions must be the same");
+
+        for (size_t i = 0; i < vector.Size(); ++i)
+        {
+            AddUpdate<implementation>(vector[i], tensor.template GetSlice<dimension0, dimension1>(i));
+        }
+    }
+
+    template <Dimension vectorOrientation, ImplementationType implementation, typename ElementType, Dimension dimension0, Dimension dimension2>
+    void AddUpdate(UnorientedConstVectorBase<ElementType> vector, TensorReference<ElementType, dimension0, vectorOrientation, dimension2> tensor)
+    {
+        DEBUG_CHECK_SIZES(vector.Size() != tensor.GetSize1(), "vector and tensor dimensions must be the same");
+        for (size_t i = 0; i < vector.Size(); ++i)
+        {
+            AddUpdate<implementation>(vector[i], tensor.template GetSlice<dimension0, dimension2>(i));
+        }
+    }
+
+    template <Dimension vectorOrientation, ImplementationType implementation, typename ElementType, Dimension dimension1, Dimension dimension2>
+    void AddUpdate(ConstRowVectorReference<ElementType> vector, TensorReference<ElementType, vectorOrientation, dimension1, dimension2> tensor)
+    {
+        AddUpdate<vectorOrientation, implementation>(vector.Transpose(), tensor);
+    }
+
+    template <Dimension vectorOrientation, ImplementationType implementation, typename ElementType, Dimension dimension1, Dimension dimension2>
+    void AddUpdate(ConstColumnVectorReference<ElementType> vector, TensorReference<ElementType, vectorOrientation, dimension1, dimension2> tensor)
+    {
+        DEBUG_CHECK_SIZES(vector.Size() != tensor.GetSize0(), "vector and tensor dimensions must be the same");
+
+        for (size_t i = 0; i < tensor.GetSize2(); ++i)
+        {
+            auto M = tensor.GetPrimarySlice(i);
+            for (size_t j = 0; j < tensor.GetSize1(); ++j)
+            {
+                auto u = M.GetColumn(j);
+                AddUpdate<implementation>(vector, u);
+            }
+        }
+    }
+
+    template <Dimension vectorOrientation, ImplementationType implementation, typename ElementType, Dimension dimension0, Dimension dimension1>
+    void ScaleAddUpdate(UnorientedConstVectorBase<ElementType> scale, UnorientedConstVectorBase<ElementType> bias, TensorReference<ElementType, dimension0, dimension1, vectorOrientation> tensor)
+    {
+        DEBUG_CHECK_SIZES(scale.Size() != tensor.GetSize2(), "vector and tensor dimensions must be the same");
+        for (size_t i = 0; i < scale.Size(); ++i)
+        {
+            ScaleAddUpdate<implementation>(scale[i], OnesMatrix(), bias[i], tensor.template GetSlice<dimension0, dimension1>(i));
+        }
+    }
+
+    template <Dimension vectorOrientation, ImplementationType implementation, typename ElementType, Dimension dimension0, Dimension dimension2>
+    void ScaleAddUpdate(UnorientedConstVectorBase<ElementType> scale, UnorientedConstVectorBase<ElementType> bias, TensorReference<ElementType, dimension0, vectorOrientation, dimension2> tensor)
+    {
+        DEBUG_CHECK_SIZES(scale.Size() != tensor.GetSize1(), "vector and tensor dimensions must be the same");
+        for (size_t i = 0; i < scale.Size(); ++i)
+        {
+            ScaleAddUpdate<implementation>(scale[i], OnesMatrix(), bias[i], tensor.template GetSlice<dimension0, dimension2>(i));
+        }
+    }
+
+    template <Dimension vectorOrientation, ImplementationType implementation, typename ElementType, Dimension dimension1, Dimension dimension2>
+    void ScaleAddUpdate(UnorientedConstVectorBase<ElementType> scale, UnorientedConstVectorBase<ElementType> bias, TensorReference<ElementType, vectorOrientation, dimension1, dimension2> tensor)
+    {
+        DEBUG_CHECK_SIZES(scale.Size() != tensor.GetSize0() || bias.Size() != tensor.GetSize0(), "vectors and tensor dimensions must be the same");
+
+        for (size_t i = 0; i < tensor.GetSize2(); ++i)
+        {
+            auto M = tensor.GetPrimarySlice(i);
+            for (size_t j = 0; j < tensor.GetSize1(); ++j)
+            {
+                auto u = M.GetColumn(j);
+                for (size_t k = 0; k < tensor.GetSize0(); ++k)
+                {
+                    u[k] = scale[k] * u[k] + bias[k];
+                }
+            }
+        }
+    }
+} // namespace math
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/math/include/Vector.h b/libraries/math/include/Vector.h
index dab01bf93..2e9db6481 100644
--- a/libraries/math/include/Vector.h
+++ b/libraries/math/include/Vector.h
@@ -227,7 +227,7 @@ namespace math
         /// <returns> A reference to the transpose of this vector. </returns>
         auto Transpose() const -> ConstVectorReference<ElementType, TransposeVectorOrientation<orientation>::value>
         {
-            // STYLE intentional deviation from project style - long implementation should be in tcc file
+            // STYLE intentional deviation from project style - long implementation should be in the implementation region
             return ConstVectorReference<ElementType, TransposeVectorOrientation<orientation>::value>(this->GetConstDataPointer(), this->Size(), this->GetIncrement());
         }
 
@@ -329,7 +329,7 @@ namespace math
         /// <returns> A reference to the transpose of this vector. </returns>
         auto Transpose() -> VectorReference<ElementType, TransposeVectorOrientation<orientation>::value>
         {
-            // STYLE intentional deviation from project style - long implementation should be in tcc file
+            // STYLE intentional deviation from project style - long implementation should be in the implementation region
             return VectorReference<ElementType, TransposeVectorOrientation<orientation>::value>(this->GetDataPointer(), this->Size(), this->GetIncrement());
         }
 
@@ -498,4 +498,433 @@ namespace math
 } // namespace math
 } // namespace ell
 
-#include "../tcc/Vector.tcc"
+#pragma region implementation
+
+#include <utilities/include/Debug.h>
+#include <utilities/include/Exception.h>
+
+namespace ell
+{
+namespace math
+{
+    //
+    // UnorientedConstVectorBase
+    //
+
+    template <typename ElementType>
+    UnorientedConstVectorBase<ElementType>::UnorientedConstVectorBase(const ElementType* pData, size_t size, size_t increment) :
+        _pData(pData),
+        _size(size),
+        _increment(increment)
+    {
+    }
+
+    template <typename ElementType>
+    const ElementType& UnorientedConstVectorBase<ElementType>::operator[](size_t index) const
+    {
+        DEBUG_THROW(index >= _size, utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "index exceeds vector size."));
+
+        return GetConstDataPointer()[index * _increment];
+    }
+
+    template <typename ElementType>
+    void UnorientedConstVectorBase<ElementType>::Swap(UnorientedConstVectorBase<ElementType>& other)
+    {
+        std::swap(_pData, other._pData);
+        std::swap(_size, other._size);
+        std::swap(_increment, other._increment);
+    }
+
+    template <typename ElementType>
+    ElementType UnorientedConstVectorBase<ElementType>::Norm0() const
+    {
+        return Aggregate([](ElementType x) { return x != 0 ? 1 : 0; });
+    }
+
+    template <typename ElementType>
+    ElementType UnorientedConstVectorBase<ElementType>::Norm1() const
+    {
+        return Aggregate([](ElementType x) { return std::abs(x); });
+    }
+
+    template <typename ElementType>
+    ElementType UnorientedConstVectorBase<ElementType>::Norm2() const
+    {
+        return std::sqrt(Norm2Squared());
+    }
+
+    template <typename ElementType>
+    ElementType UnorientedConstVectorBase<ElementType>::Norm2Squared() const
+    {
+        return Aggregate([](ElementType x) { return x * x; });
+    }
+
+    template <typename ElementType>
+    ElementType UnorientedConstVectorBase<ElementType>::NormInfinity() const
+    {
+        if (_size == 0)
+        {
+            return 0;
+        }
+
+        const ElementType* pData = GetConstDataPointer();
+        const ElementType* pEnd = pData + _size * _increment;
+        ElementType result = *pData;
+        pData += _increment;
+
+        while (pData < pEnd)
+        {
+            result = std::max(result, std::abs(*pData));
+            pData += _increment;
+        }
+
+        return result;
+    }
+
+    template <typename ElementType>
+    template <typename MapperType>
+    ElementType UnorientedConstVectorBase<ElementType>::Aggregate(MapperType mapper) const
+    {
+        ElementType result = 0;
+        const ElementType* current = GetConstDataPointer();
+        const ElementType* end = current + _size * _increment;
+        while (current < end)
+        {
+            result += mapper(*current);
+            current += _increment;
+        }
+        return result;
+    }
+
+    template <typename ElementType>
+    std::vector<ElementType> UnorientedConstVectorBase<ElementType>::ToArray() const
+    {
+        std::vector<ElementType> result(_size);
+
+        const ElementType* pData = GetConstDataPointer();
+        for (size_t i = 0; i < _size; ++i, pData += _increment)
+            result[i] = *pData;
+
+        return result;
+    }
+
+    //
+    // ConstVectorReference
+    //
+
+    template <typename ElementType, VectorOrientation orientation>
+    ConstVectorReference<ElementType, orientation>::ConstVectorReference(const ElementType* pData, size_t size, size_t increment) :
+        UnorientedConstVectorBase<ElementType>(pData, size, increment)
+    {
+    }
+
+    template <typename ElementType, VectorOrientation orientation>
+    void ConstVectorReference<ElementType, orientation>::Swap(ConstVectorReference<ElementType, orientation>& other)
+    {
+        UnorientedConstVectorBase<ElementType>::Swap(other);
+    }
+
+    template <typename ElementType, VectorOrientation orientation>
+    bool ConstVectorReference<ElementType, orientation>::IsEqual(ConstVectorReference<ElementType, orientation> other, ElementType tolerance) const
+    {
+        if (this->Size() != other.Size())
+        {
+            return false;
+        }
+
+        const ElementType* pThis = this->GetConstDataPointer();
+        const ElementType* pThisEnd = pThis + this->Size() * this->GetIncrement();
+        const ElementType* pOther = other.GetConstDataPointer();
+
+        while (pThis < pThisEnd)
+        {
+            auto diff = (*pThis) - (*pOther);
+
+            if (diff > tolerance || -diff > tolerance)
+            {
+                return false;
+            }
+            pThis += this->GetIncrement();
+            pOther += other.GetIncrement();
+        }
+        return true;
+    }
+
+    template <typename ElementType, VectorOrientation orientation>
+    bool ConstVectorReference<ElementType, orientation>::operator==(const ConstVectorReference<ElementType, orientation>& other) const
+    {
+        return IsEqual(other);
+    }
+
+    template <typename ElementType, VectorOrientation orientation>
+    bool ConstVectorReference<ElementType, orientation>::operator!=(const ConstVectorReference<ElementType, orientation>& other) const
+    {
+        return !(*this == other);
+    }
+
+    template <typename ElementType, VectorOrientation orientation>
+    ConstVectorReference<ElementType, orientation> ConstVectorReference<ElementType, orientation>::GetSubVector(size_t offset, size_t size) const
+    {
+        DEBUG_THROW(offset + size > this->Size(), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "subvector offset + subvector size exceeds vector size."));
+
+        return ConstVectorReference<ElementType, orientation>(this->GetConstDataPointer() + offset * this->GetIncrement(), size, this->GetIncrement());
+    }
+
+    //
+    // VectorReference
+    //
+
+    template <typename ElementType, VectorOrientation orientation>
+    VectorReference<ElementType, orientation>::VectorReference(const ElementType* pData, size_t size, size_t increment) :
+        ConstVectorReference<ElementType, orientation>(pData, size, increment)
+    {
+    }
+
+    template <typename ElementType, VectorOrientation orientation>
+    ElementType& VectorReference<ElementType, orientation>::operator[](size_t index)
+    {
+        DEBUG_THROW(index >= this->Size(), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "index exceeds vector size."));
+
+        return GetDataPointer()[index * this->GetIncrement()];
+    }
+
+    template <typename ElementType, VectorOrientation orientation>
+    void VectorReference<ElementType, orientation>::Swap(VectorReference<ElementType, orientation>& other)
+    {
+        ConstVectorReference<ElementType, orientation>::Swap(other);
+    }
+
+    template <typename ElementType, VectorOrientation orientation>
+    template <typename OtherElementType>
+    void VectorReference<ElementType, orientation>::CopyFrom(ConstVectorReference<OtherElementType, orientation> other)
+    {
+        if (this->Size() != other.Size())
+        {
+            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "this vector and other vector are not the same size.");
+        }
+
+        ElementType* pData = GetDataPointer();
+        const OtherElementType* pOtherData = other.GetConstDataPointer();
+        const size_t otherIncrement = other.GetIncrement();
+        const OtherElementType* pOtherEnd = pOtherData + otherIncrement * other.Size();
+
+        if (this->GetIncrement() == 1 && otherIncrement == 1)
+        {
+            while (pOtherData < pOtherEnd)
+            {
+                (*pData) = static_cast<ElementType>(*pOtherData);
+                ++pData;
+                ++pOtherData;
+            }
+        }
+        else
+        {
+            while (pOtherData < pOtherEnd)
+            {
+                (*pData) = static_cast<ElementType>(*pOtherData);
+                pData += this->GetIncrement();
+                pOtherData += otherIncrement;
+            }
+        }
+    }
+
+    template <typename ElementType, VectorOrientation orientation>
+    void VectorReference<ElementType, orientation>::Reset()
+    {
+        Fill(0);
+    }
+
+    template <typename ElementType, VectorOrientation orientation>
+    void VectorReference<ElementType, orientation>::Fill(ElementType value)
+    {
+        ElementType* data = GetDataPointer();
+        ElementType* end = data + this->Size() * this->GetIncrement();
+
+        if (this->IsContiguous())
+        {
+            std::fill(data, end, value);
+        }
+        else
+        {
+            while (data < end)
+            {
+                *data = value;
+                data += this->GetIncrement();
+            }
+        }
+    }
+
+    template <typename ElementType, VectorOrientation orientation>
+    template <typename GeneratorType>
+    void VectorReference<ElementType, orientation>::Generate(GeneratorType generator)
+    {
+        ElementType* data = GetDataPointer();
+        ElementType* end = data + this->Size() * this->GetIncrement();
+
+        while (data < end)
+        {
+            *data = static_cast<ElementType>(generator());
+            data += this->GetIncrement();
+        }
+    }
+
+    template <typename ElementType, VectorOrientation orientation>
+    template <typename TransformationType>
+    void VectorReference<ElementType, orientation>::Transform(TransformationType transformation)
+    {
+        ElementType* pData = this->GetDataPointer();
+        const ElementType* pEnd = pData + this->Size() * this->GetIncrement();
+        while (pData < pEnd)
+        {
+            *pData = transformation(*pData);
+            pData += this->GetIncrement();
+        }
+    }
+
+    template <typename ElementType, VectorOrientation orientation>
+    VectorReference<ElementType, orientation> VectorReference<ElementType, orientation>::GetReference()
+    {
+        return VectorReference<ElementType, orientation>(GetDataPointer(), this->Size(), this->GetIncrement());
+    }
+
+    template <typename ElementType, VectorOrientation orientation>
+    VectorReference<ElementType, orientation> VectorReference<ElementType, orientation>::GetSubVector(size_t offset, size_t size)
+    {
+        DEBUG_THROW(offset + size > this->Size(), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "subvector offset + subvector size exceeds vector size."));
+
+        return VectorReference<ElementType, orientation>(GetDataPointer() + offset * this->GetIncrement(), size, this->GetIncrement());
+    }
+
+    //
+    // Vector
+    //
+
+    template <typename ElementType, VectorOrientation orientation>
+    Vector<ElementType, orientation>::Vector(size_t size) :
+        VectorReference<ElementType, orientation>(nullptr, size, 1),
+        _data(size)
+    {
+        this->_pData = _data.data();
+    }
+
+    template <typename ElementType, VectorOrientation orientation>
+    Vector<ElementType, orientation>::Vector(std::vector<ElementType> data) :
+        VectorReference<ElementType, orientation>(nullptr, data.size(), 1),
+        _data(std::move(data))
+    {
+        this->_pData = _data.data();
+    }
+
+    template <typename ElementType, VectorOrientation orientation>
+    Vector<ElementType, orientation>::Vector(std::initializer_list<ElementType> list) :
+        VectorReference<ElementType, orientation>(nullptr, list.size(), 1),
+        _data(list.begin(), list.end())
+    {
+        this->_pData = _data.data();
+    }
+
+    template <typename ElementType, VectorOrientation orientation>
+    Vector<ElementType, orientation>::Vector(Vector<ElementType, orientation>&& other) :
+        VectorReference<ElementType, orientation>(nullptr, other.Size(), other.GetIncrement()),
+        _data(std::move(other._data))
+    {
+        this->_pData = _data.data();
+    }
+
+    template <typename ElementType, VectorOrientation orientation>
+    Vector<ElementType, orientation>::Vector(const Vector<ElementType, orientation>& other) :
+        VectorReference<ElementType, orientation>(nullptr, other.Size(), 1),
+        _data(other.Size())
+    {
+        _pData = _data.data();
+        this->CopyFrom(other);
+    }
+
+    template <typename ElementType, VectorOrientation orientation>
+    Vector<ElementType, orientation>::Vector(ConstVectorReference<ElementType, orientation>& other) :
+        VectorReference<ElementType, orientation>(nullptr, other.Size(), 1),
+        _data(other.Size())
+    {
+        _pData = _data.data();
+        this->CopyFrom(other);
+    }
+
+    template <typename ElementType, VectorOrientation orientation>
+    Vector<ElementType, orientation>::Vector(ConstVectorReference<ElementType, TransposeVectorOrientation<orientation>::value>& other) :
+        VectorReference<ElementType, orientation>(nullptr, other.Size(), 1),
+        _data(other.Size())
+    {
+        _pData = _data.data();
+        this->CopyFrom(other);
+    }
+
+    template <typename ElementType, VectorOrientation orientation>
+    void Vector<ElementType, orientation>::Resize(size_t size)
+    {
+        _data.resize(size);
+        this->_pData = _data.data();
+        this->_size = size;
+    }
+
+    template <typename ElementType, VectorOrientation orientation>
+    Vector<ElementType, orientation>& Vector<ElementType, orientation>::operator=(Vector<ElementType, orientation> other)
+    {
+        Swap(other);
+        return *this;
+    }
+
+    template <typename ElementType, VectorOrientation orientation>
+    void Vector<ElementType, orientation>::Swap(Vector<ElementType, orientation>& other)
+    {
+        VectorReference<ElementType, orientation>::Swap(other);
+        std::swap(_data, other._data);
+    }
+
+    template <typename ElementType, VectorOrientation orientation>
+    utilities::StlStridedIterator<typename std::vector<ElementType>::iterator> begin(Vector<ElementType, orientation>& vector)
+    {
+        return { vector._data.begin(), static_cast<ptrdiff_t>(vector.GetIncrement()) };
+    }
+
+    template <typename ElementType, VectorOrientation orientation>
+    utilities::StlStridedIterator<typename std::vector<ElementType>::const_iterator> begin(const Vector<ElementType, orientation>& vector)
+    {
+        return { vector._data.cbegin(), static_cast<ptrdiff_t>(vector.GetIncrement()) };
+    }
+
+    template <typename ElementType, VectorOrientation orientation>
+    utilities::StlStridedIterator<typename std::vector<ElementType>::iterator> end(Vector<ElementType, orientation>& vector)
+    {
+        return { vector._data.end(), static_cast<ptrdiff_t>(vector.GetIncrement()) };
+    }
+
+    template <typename ElementType, VectorOrientation orientation>
+    utilities::StlStridedIterator<typename std::vector<ElementType>::const_iterator> end(const Vector<ElementType, orientation>& vector)
+    {
+        return { vector._data.cend(), static_cast<ptrdiff_t>(vector.GetIncrement()) };
+    }
+
+    //
+    // VectorArchiver
+    //
+    template <typename ElementType, VectorOrientation orientation>
+    void VectorArchiver::Write(const Vector<ElementType, orientation>& vector, const std::string& name, utilities::Archiver& archiver)
+    {
+        archiver[name] << vector.ToArray();
+    }
+
+    template <typename ElementType, VectorOrientation orientation>
+    void VectorArchiver::Read(Vector<ElementType, orientation>& vector, const std::string& name, utilities::Unarchiver& archiver)
+    {
+        std::vector<ElementType> values;
+
+        archiver[name] >> values;
+
+        Vector<ElementType, orientation> value(std::move(values));
+
+        vector.Swap(value);
+    }
+} // namespace math
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/math/include/VectorOperations.h b/libraries/math/include/VectorOperations.h
index 966aacbbf..e54c01d19 100644
--- a/libraries/math/include/VectorOperations.h
+++ b/libraries/math/include/VectorOperations.h
@@ -594,4 +594,831 @@ namespace math
 } // namespace math
 } // namespace ell
 
-#include "../tcc/VectorOperations.tcc"
+#pragma region implementation
+
+#include <utilities/include/Debug.h>
+#include <utilities/include/Exception.h>
+
+namespace ell
+{
+namespace math
+{
+    template <typename ElementType, VectorOrientation orientation>
+    void Print(ConstVectorReference<ElementType, orientation> vector, std::ostream& stream, size_t indent, size_t maxElements)
+    {
+        DEBUG_CHECK_SIZES(maxElements < 3, "cannot specify maxElements below 3.");
+
+        stream << std::string(indent, ' ');
+        if (vector.Size() == 0)
+        {
+            stream << "{ }";
+        }
+        else if (vector.Size() <= maxElements)
+        {
+            stream << "{ " << vector[0];
+            for (size_t i = 1; i < vector.Size(); ++i)
+            {
+                stream << ", " << vector[i];
+            }
+            stream << " }";
+        }
+        else
+        {
+            stream << "{ " << vector[0];
+            for (size_t i = 1; i < maxElements - 2; ++i)
+            {
+                stream << ", " << vector[i];
+            }
+            stream << ", ..., " << vector[vector.Size() - 1] << " }";
+        }
+    }
+
+    template <typename ElementType, VectorOrientation orientation>
+    std::ostream& operator<<(std::ostream& stream, ConstVectorReference<ElementType, orientation> vector)
+    {
+        Print(vector, stream);
+        return stream;
+    }
+
+    template <typename ElementType, VectorOrientation orientation, typename TransformationType>
+    TransformedConstVectorReference<ElementType, orientation, TransformationType>::TransformedConstVectorReference(ConstVectorReference<ElementType, orientation> vector, TransformationType transformation) :
+        _vector(vector),
+        _transformation(std::move(transformation))
+    {
+    }
+
+    template <typename ElementType, VectorOrientation orientation, typename TransformationType>
+    TransformedConstVectorReference<ElementType, orientation, TransformationType> TransformVector(ConstVectorReference<ElementType, orientation> vector, TransformationType transformation)
+    {
+        return TransformedConstVectorReference<ElementType, orientation, TransformationType>(vector, transformation);
+    }
+
+    template <typename ElementType, VectorOrientation orientation>
+    auto Square(ConstVectorReference<ElementType, orientation> vector) -> TransformedConstVectorReference<ElementType, orientation, Transformation<ElementType>>
+    {
+        return TransformVector(vector, SquareTransformation<ElementType>);
+    }
+
+    template <typename ElementType, VectorOrientation orientation>
+    auto Sqrt(ConstVectorReference<ElementType, orientation> vector) -> TransformedConstVectorReference<ElementType, orientation, Transformation<ElementType>>
+    {
+        return TransformVector(vector, SquareRootTransformation<ElementType>);
+    }
+
+    template <typename ElementType, VectorOrientation orientation>
+    auto Abs(ConstVectorReference<ElementType, orientation> vector) -> TransformedConstVectorReference<ElementType, orientation, Transformation<ElementType>>
+    {
+        return TransformVector(vector, AbsoluteValueTransformation<ElementType>);
+    }
+
+    template <typename ElementType>
+    ElementType ScaleFunction<ElementType>::operator()(ElementType x)
+    {
+        return x * _value;
+    }
+
+    template <typename ElementType, VectorOrientation orientation>
+    auto operator*(double scalar, ConstVectorReference<ElementType, orientation> vector) -> TransformedConstVectorReference<ElementType, orientation, ScaleFunction<ElementType>>
+    {
+        ScaleFunction<ElementType> transformation{ static_cast<ElementType>(scalar) };
+        return TransformVector(vector, transformation);
+    }
+
+    template <typename VectorElementType, VectorOrientation orientation, typename ScalarElementType, utilities::IsFundamental<ScalarElementType> concept>
+    void operator+=(VectorReference<VectorElementType, orientation> vector, ScalarElementType scalar)
+    {
+        AddUpdate(static_cast<VectorElementType>(scalar), vector);
+    }
+
+    template <typename ElementType, VectorOrientation orientation>
+    void operator+=(VectorReference<ElementType, orientation> vectorB, ConstVectorReference<ElementType, orientation> vectorA)
+    {
+        AddUpdate(vectorA, vectorB);
+    }
+
+    template <typename ElementType, VectorOrientation orientation, typename TransformationType>
+    void operator+=(VectorReference<ElementType, orientation> vector, TransformedConstVectorReference<ElementType, orientation, TransformationType> transformedVector)
+    {
+        TransformAddUpdate(transformedVector.GetTransformation(), transformedVector.GetVector(), vector);
+    }
+
+    template <typename VectorElementType, VectorOrientation orientation, typename ScalarElementType, utilities::IsFundamental<ScalarElementType> concept>
+    void operator-=(VectorReference<VectorElementType, orientation> vector, ScalarElementType scalar)
+    {
+        AddUpdate(static_cast<VectorElementType>(-scalar), vector);
+    }
+
+    template <typename ElementType, VectorOrientation orientation>
+    void operator-=(VectorReference<ElementType, orientation> vectorB, ConstVectorReference<ElementType, orientation> vectorA)
+    {
+        ScaleAddUpdate(static_cast<ElementType>(-1), vectorA, One(), vectorB);
+    }
+
+    template <typename VectorElementType, VectorOrientation orientation, typename ScalarElementType, utilities::IsFundamental<ScalarElementType> concept>
+    void operator*=(VectorReference<VectorElementType, orientation> vector, ScalarElementType scalar)
+    {
+        ScaleUpdate(static_cast<VectorElementType>(scalar), vector);
+    }
+
+    template <typename VectorElementType, VectorOrientation orientation, typename ScalarElementType, utilities::IsFundamental<ScalarElementType> concept>
+    void operator/=(VectorReference<VectorElementType, orientation> vector, ScalarElementType scalar)
+    {
+        DEBUG_THROW(scalar == 0, utilities::NumericException(utilities::NumericExceptionErrors::divideByZero, "Divide by zero."));
+
+        ScaleUpdate(1 / static_cast<VectorElementType>(scalar), vector);
+    }
+
+    // vector += scalar
+    template <ImplementationType implementation, typename ElementType, VectorOrientation orientation>
+    void AddUpdate(ElementType scalar, VectorReference<ElementType, orientation> vector)
+    {
+        if (scalar == 0)
+        {
+            return;
+        }
+        else
+        {
+            Internal::VectorOperations<implementation>::AddUpdate(scalar, vector);
+        }
+    }
+
+    // vectorB += vectorA
+    template <ImplementationType implementation, typename ElementType, VectorOrientation orientation>
+    void AddUpdate(ConstVectorReference<ElementType, orientation> vectorA, VectorReference<ElementType, orientation> vectorB)
+    {
+        DEBUG_CHECK_SIZES(vectorB.Size() != vectorA.Size(), "Incompatible vector sizes.");
+
+        Internal::VectorOperations<implementation>::AddUpdate(vectorA, vectorB);
+    }
+
+    // output = scalar + vector
+    template <ImplementationType implementation, typename ElementType, VectorOrientation orientation>
+    void AddSet(ElementType scalar, ConstVectorReference<ElementType, orientation> vector, VectorReference<ElementType, orientation> output)
+    {
+        if (scalar == 0)
+        {
+            output.CopyFrom(vector);
+        }
+        else
+        {
+            Internal::VectorOperations<implementation>::AddSet(scalar, vector, output);
+        }
+    }
+
+    // output = vectorA + vectorB
+    template <ImplementationType implementation, typename ElementType, VectorOrientation orientation>
+    void AddSet(ConstVectorReference<ElementType, orientation> vectorA, ConstVectorReference<ElementType, orientation> vectorB, VectorReference<ElementType, orientation> output)
+    {
+        DEBUG_CHECK_SIZES(vectorA.Size() != vectorB.Size(), "Incompatible vector sizes.");
+
+        Internal::VectorOperations<implementation>::AddSet(vectorA, vectorB, output);
+    }
+
+    // vector *= scalar
+    template <ImplementationType implementation, typename ElementType, VectorOrientation orientation>
+    void ScaleUpdate(ElementType scalar, VectorReference<ElementType, orientation> vector)
+    {
+        if (scalar == 1)
+        {
+            return;
+        }
+        else if (scalar == 0)
+        {
+            vector.Reset();
+        }
+        else
+        {
+            Internal::VectorOperations<implementation>::ScaleUpdate(scalar, vector);
+        }
+    }
+
+    // output = scalar * vector
+    template <ImplementationType implementation, typename ElementType, VectorOrientation orientation>
+    void ScaleSet(ElementType scalar, ConstVectorReference<ElementType, orientation> vector, VectorReference<ElementType, orientation> output)
+    {
+        DEBUG_CHECK_SIZES(vector.Size() != output.Size(), "Incompatible vector sizes.");
+
+        if (scalar == 1)
+        {
+            output.CopyFrom(vector);
+        }
+        else if (scalar == 0)
+        {
+            output.Reset();
+        }
+        else
+        {
+            Internal::VectorOperations<implementation>::ScaleSet(scalar, vector, output);
+        }
+    }
+
+    // vectorB += scalarA * vectorA
+    template <ImplementationType implementation, typename ElementType, VectorOrientation orientation>
+    void ScaleAddUpdate(ElementType scalarA, ConstVectorReference<ElementType, orientation> vectorA, One, VectorReference<ElementType, orientation> vectorB)
+    {
+        DEBUG_CHECK_SIZES(vectorB.Size() != vectorA.Size(), "Incompatible vector sizes.");
+
+        if (scalarA == 0)
+        {
+            return;
+        }
+        else if (scalarA == 1)
+        {
+            AddUpdate<implementation>(vectorA, vectorB);
+        }
+        else
+        {
+            Internal::VectorOperations<implementation>::ScaleAddUpdate(scalarA, vectorA, One(), vectorB);
+        }
+    }
+
+    // vectorB = scalarA + scalarB * vectorB
+    template <ImplementationType implementation, typename ElementType, VectorOrientation orientation>
+    void ScaleAddUpdate(ElementType scalarA, OnesVector, ElementType scalarB, VectorReference<ElementType, orientation> vectorB)
+    {
+        if (scalarA == 0)
+        {
+            ScaleUpdate<implementation>(scalarB, vectorB);
+        }
+        else if (scalarB == 0)
+        {
+            vectorB.Fill(scalarA);
+        }
+        else if (scalarB == 1)
+        {
+            Internal::VectorOperations<implementation>::AddUpdate(scalarA, vectorB);
+        }
+        else
+        {
+            Internal::VectorOperations<implementation>::ScaleAddUpdate(scalarA, OnesVector(), scalarB, vectorB);
+        }
+    }
+
+    // vectorB = vectorA + scalarB * vectorB
+    template <ImplementationType implementation, typename ElementType, VectorOrientation orientation>
+    void ScaleAddUpdate(One, ConstVectorReference<ElementType, orientation> vectorA, ElementType scalarB, VectorReference<ElementType, orientation> vectorB)
+    {
+        DEBUG_CHECK_SIZES(vectorB.Size() != vectorA.Size(), "Incompatible vector sizes.");
+
+        if (scalarB == 0)
+        {
+            vectorB.CopyFrom(vectorA);
+        }
+        else if (scalarB == 1)
+        {
+            Internal::VectorOperations<implementation>::AddUpdate(vectorA, vectorB);
+        }
+        else
+        {
+            Internal::VectorOperations<implementation>::ScaleAddUpdate(One(), vectorA, scalarB, vectorB);
+        }
+    }
+
+    // vectorB =  scalarA * vectorA + scalarB * vectorB
+    template <ImplementationType implementation, typename ElementType, VectorOrientation orientation>
+    void ScaleAddUpdate(ElementType scalarA, ConstVectorReference<ElementType, orientation> vectorA, ElementType scalarB, VectorReference<ElementType, orientation> vectorB)
+    {
+        DEBUG_CHECK_SIZES(vectorB.Size() != vectorA.Size(), "Incompatible vector sizes.");
+
+        if (scalarA == 0)
+        {
+            ScaleUpdate<implementation>(scalarB, vectorB);
+        }
+        else if (scalarA == 1)
+        {
+            ScaleAddUpdate<implementation>(One(), vectorA, scalarB, vectorB);
+        }
+        else if (scalarB == 0)
+        {
+            Internal::VectorOperations<implementation>::ScaleSet(scalarA, vectorA, vectorB);
+        }
+        else if (scalarB == 1)
+        {
+            Internal::VectorOperations<implementation>::ScaleAddUpdate(scalarA, vectorA, One(), vectorB);
+        }
+        else
+        {
+            Internal::VectorOperations<implementation>::ScaleAddUpdate(scalarA, vectorA, scalarB, vectorB);
+        }
+    }
+
+    // output = scalarA * vectorA + vectorB
+    template <ImplementationType implementation, typename ElementType, VectorOrientation orientation>
+    void ScaleAddSet(ElementType scalarA, ConstVectorReference<ElementType, orientation> vectorA, One, ConstVectorReference<ElementType, orientation> vectorB, VectorReference<ElementType, orientation> output)
+    {
+        DEBUG_CHECK_SIZES(vectorB.Size() != vectorA.Size() || vectorA.Size() != output.Size(), "Incompatible vector sizes.");
+
+        if (scalarA == 0)
+        {
+            output.CopyFrom(vectorB);
+        }
+        else if (scalarA == 1)
+        {
+            Internal::VectorOperations<implementation>::AddSet(vectorA, vectorB, output);
+        }
+        else
+        {
+            Internal::VectorOperations<implementation>::ScaleAddSet(scalarA, vectorA, One(), vectorB, output);
+        }
+    }
+
+    // output = scalarA + scalarB * vectorB
+    template <ImplementationType implementation, typename ElementType, VectorOrientation orientation>
+    void ScaleAddSet(ElementType scalarA, OnesVector, ElementType scalarB, ConstVectorReference<ElementType, orientation> vectorB, VectorReference<ElementType, orientation> output)
+    {
+        DEBUG_CHECK_SIZES(vectorB.Size() != output.Size(), "Incompatible vector sizes.");
+
+        if (scalarA == 0)
+        {
+            ScaleSet<implementation>(scalarB, vectorB, output);
+        }
+        else if (scalarB == 0)
+        {
+            output.Fill(scalarA);
+        }
+        else if (scalarB == 1)
+        {
+            Internal::VectorOperations<implementation>::AddSet(scalarA, vectorB, output);
+        }
+        else
+        {
+            Internal::VectorOperations<implementation>::ScaleAddSet(scalarA, OnesVector(), scalarB, vectorB, output);
+        }
+    }
+
+    // output = vectorA + scalarB * vectorB
+    template <ImplementationType implementation, typename ElementType, VectorOrientation orientation>
+    void ScaleAddSet(One, ConstVectorReference<ElementType, orientation> vectorA, ElementType scalarB, ConstVectorReference<ElementType, orientation> vectorB, VectorReference<ElementType, orientation> output)
+    {
+        DEBUG_CHECK_SIZES(vectorB.Size() != vectorA.Size() || vectorA.Size() != output.Size(), "Incompatible vector sizes.");
+
+        if (scalarB == 0)
+        {
+            output.CopyFrom(vectorA);
+        }
+        else if (scalarB == 1)
+        {
+            Internal::VectorOperations<implementation>::AddSet(vectorA, vectorB, output);
+        }
+        else
+        {
+            Internal::VectorOperations<implementation>::ScaleAddSet(One(), vectorA, scalarB, vectorB, output);
+        }
+    }
+
+    // output = scalarA * vectorA + scalarB * vectorB
+    template <ImplementationType implementation, typename ElementType, VectorOrientation orientation>
+    void ScaleAddSet(ElementType scalarA, ConstVectorReference<ElementType, orientation> vectorA, ElementType scalarB, ConstVectorReference<ElementType, orientation> vectorB, VectorReference<ElementType, orientation> output)
+    {
+        DEBUG_CHECK_SIZES(vectorB.Size() != vectorA.Size() || vectorB.Size() != output.Size(), "Incompatible vector sizes.");
+
+        if (scalarA == 0)
+        {
+            ScaleSet<implementation>(scalarB, vectorB, output);
+        }
+        else if (scalarA == 1)
+        {
+            ScaleAddSet<implementation>(One(), vectorA, scalarB, vectorB, output);
+        }
+        else if (scalarB == 0)
+        {
+            Internal::VectorOperations<implementation>::ScaleSet(scalarA, vectorA, output);
+        }
+        else if (scalarB == 1)
+        {
+            Internal::VectorOperations<implementation>::ScaleAddSet(scalarA, vectorA, One(), vectorB, output);
+        }
+        else
+        {
+            Internal::VectorOperations<implementation>::ScaleAddSet(scalarA, vectorA, scalarB, vectorB, output);
+        }
+    }
+
+    template <typename ElementType, VectorOrientation orientation>
+    void ElementwiseMultiplySet(ConstVectorReference<ElementType, orientation> vectorA, ConstVectorReference<ElementType, orientation> vectorB, VectorReference<ElementType, orientation> vectorC)
+    {
+        DEBUG_CHECK_SIZES(vectorA.Size() != vectorB.Size() || vectorA.Size() != vectorB.Size(), "Incompatible vector sizes.");
+
+        const ElementType* pVectorAData = vectorA.GetConstDataPointer();
+        const ElementType* pVectorBData = vectorB.GetConstDataPointer();
+
+        size_t i = 0;
+        const ElementType* end = vectorA.GetConstDataPointer() + vectorA.GetIncrement() * vectorA.Size();
+
+        while (pVectorAData < end)
+        {
+            vectorC[i++] = (*pVectorAData) * (*pVectorBData);
+            pVectorAData += vectorA.GetIncrement();
+            pVectorBData += vectorB.GetIncrement();
+        }
+    }
+
+    template <ImplementationType implementation, typename ElementType>
+    void InnerProduct(ConstRowVectorReference<ElementType> vectorA, ConstColumnVectorReference<ElementType> vectorB, ElementType& result)
+    {
+        DEBUG_CHECK_SIZES(vectorA.Size() != vectorB.Size(), "Incompatible vector sizes.");
+
+        Internal::VectorOperations<implementation>::InnerProduct(vectorA, vectorB, result);
+    }
+
+    template <typename ElementType>
+    ElementType operator*(ConstRowVectorReference<ElementType> vectorA, ConstColumnVectorReference<ElementType> vectorB)
+    {
+        ElementType result;
+        InnerProduct(vectorA, vectorB, result);
+        return result;
+    }
+
+    template <typename ElementType>
+    ElementType Dot(UnorientedConstVectorBase<ElementType> vectorA, UnorientedConstVectorBase<ElementType> vectorB)
+    {
+        ConstRowVectorReference<ElementType> rowVector(vectorA.GetConstDataPointer(), vectorA.Size(), vectorA.GetIncrement());
+        ConstColumnVectorReference<ElementType> columnVector(vectorB.GetConstDataPointer(), vectorB.Size(), vectorB.GetIncrement());
+
+        ElementType result;
+        InnerProduct(rowVector, columnVector, result);
+        return result;
+    }
+
+    template <ImplementationType implementation, typename ElementType, MatrixLayout layout>
+    void OuterProduct(ConstColumnVectorReference<ElementType> vectorA, ConstRowVectorReference<ElementType> vectorB, MatrixReference<ElementType, layout> matrix)
+    {
+        DEBUG_CHECK_SIZES(vectorA.Size() != matrix.NumRows() || vectorB.Size() != matrix.NumColumns(), "Incompatible vector matrix sizes.");
+
+        Internal::VectorOperations<implementation>::OuterProduct(vectorA, vectorB, matrix);
+    }
+
+    template <typename ElementType, VectorOrientation orientation>
+    void CumulativeSumUpdate(VectorReference<ElementType, orientation> vector)
+    {
+        ElementType* pData = vector.GetDataPointer();
+        const ElementType* pEnd = pData + vector.GetIncrement() * vector.Size();
+        ElementType sum = (*pData);
+        pData += vector.GetIncrement();
+
+        while (pData < pEnd)
+        {
+            sum += (*pData);
+            (*pData) = sum;
+            pData += vector.GetIncrement();
+        }
+    }
+
+    template <typename ElementType, VectorOrientation orientation>
+    void ConsecutiveDifferenceUpdate(VectorReference<ElementType, orientation> vector)
+    {
+        ElementType* pData = vector.GetDataPointer();
+        const ElementType* pEnd = pData + vector.GetIncrement() * vector.Size();
+        ElementType previous = (*pData);
+        pData += vector.GetIncrement();
+
+        while (pData < pEnd)
+        {
+            ElementType sum = (*pData);
+            (*pData) -= previous;
+            previous = sum;
+            pData += vector.GetIncrement();
+        }
+    }
+
+    template <typename ElementType, VectorOrientation orientation, typename TransformationType>
+    void TransformUpdate(TransformationType transformation, VectorReference<ElementType, orientation> vector)
+    {
+        vector.Transform(transformation);
+    }
+
+    template <typename ElementType, VectorOrientation orientation, typename TransformationType>
+    void TransformSet(TransformationType transformation, ConstVectorReference<ElementType, orientation> vector, VectorReference<ElementType, orientation> output)
+    {
+        DEBUG_CHECK_SIZES(vector.Size() != output.Size(), "Incompatible vector sizes.");
+
+        ElementType* pOutputData = output.GetDataPointer();
+        const ElementType* pVectorData = vector.GetConstDataPointer();
+        const ElementType* pOutputEnd = pOutputData + output.Size() * output.GetIncrement();
+        while (pOutputData < pOutputEnd)
+        {
+            *pOutputData = transformation(*pVectorData);
+            pOutputData += output.GetIncrement();
+            pVectorData += vector.GetIncrement();
+        }
+    }
+
+    template <typename ElementType, VectorOrientation orientation, typename TransformationType>
+    void TransformAddUpdate(TransformationType transformation, ConstVectorReference<ElementType, orientation> vectorA, VectorReference<ElementType, orientation> vectorB)
+    {
+        DEBUG_CHECK_SIZES(vectorA.Size() != vectorB.Size(), "Incompatible vector sizes.");
+
+        ElementType* pVectorBData = vectorB.GetDataPointer();
+        const ElementType* pVectorAData = vectorA.GetConstDataPointer();
+        const ElementType* pVectorBEnd = pVectorBData + vectorB.Size() * vectorB.GetIncrement();
+        while (pVectorBData < pVectorBEnd)
+        {
+            *pVectorBData += transformation(*pVectorAData);
+            pVectorBData += vectorB.GetIncrement();
+            pVectorAData += vectorA.GetIncrement();
+        }
+    }
+
+    //
+    // NativeVectorOperations
+    //
+    namespace Internal
+    {
+        template <typename ElementType>
+        void VectorOperations<ImplementationType::native>::InnerProduct(ConstRowVectorReference<ElementType> vectorA, ConstColumnVectorReference<ElementType> vectorB, ElementType& result)
+        {
+            const ElementType* pVectorAData = vectorA.GetConstDataPointer();
+            const ElementType* pVectorBData = vectorB.GetConstDataPointer();
+            const ElementType* pVectorAEnd = pVectorAData + vectorA.GetIncrement() * vectorA.Size();
+            result = 0;
+
+            while (pVectorAData < pVectorAEnd)
+            {
+                result += (*pVectorAData) * (*pVectorBData);
+                pVectorAData += vectorA.GetIncrement();
+                pVectorBData += vectorB.GetIncrement();
+            }
+        }
+
+        template <typename ElementType, MatrixLayout layout>
+        void VectorOperations<ImplementationType::native>::OuterProduct(ConstColumnVectorReference<ElementType> vectorA, ConstRowVectorReference<ElementType> vectorB, MatrixReference<ElementType, layout> matrix)
+        {
+            for (size_t i = 0; i < matrix.NumRows(); ++i)
+            {
+                for (size_t j = 0; j < matrix.NumColumns(); ++j)
+                {
+                    matrix(i, j) = vectorA[i] * vectorB[j];
+                }
+            }
+        }
+
+        template <typename ElementType, VectorOrientation orientation, typename BinaryOperation>
+        void UnaryVectorUpdateImplementation(VectorReference<ElementType, orientation> vector, BinaryOperation unaryOperation)
+        {
+            ElementType* pData = vector.GetDataPointer();
+            const ElementType* pEnd = pData + vector.GetIncrement() * vector.Size();
+
+            while (pData < pEnd)
+            {
+                unaryOperation(*pData);
+                pData += vector.GetIncrement();
+            }
+        }
+
+        template <typename ElementType, VectorOrientation orientation, typename BinaryOperation>
+        void BinaryVectorUpdateImplementation(ConstVectorReference<ElementType, orientation> vectorA, VectorReference<ElementType, orientation> vectorB, BinaryOperation binaryOperation)
+        {
+            ElementType* pVectorBData = vectorB.GetDataPointer();
+            const ElementType* pVectorAData = vectorA.GetConstDataPointer();
+            const ElementType* pVectorBEnd = pVectorBData + vectorB.GetIncrement() * vectorB.Size();
+
+            while (pVectorBData < pVectorBEnd)
+            {
+                binaryOperation(*pVectorAData, *pVectorBData);
+                pVectorBData += vectorB.GetIncrement();
+                pVectorAData += vectorA.GetIncrement();
+            }
+        }
+
+        template <typename ElementType, VectorOrientation orientation, typename TrinaryOperation>
+        void TrinaryVectorUpdateImplementation(ConstVectorReference<ElementType, orientation> vectorA, ConstVectorReference<ElementType, orientation> vectorB, VectorReference<ElementType, orientation> output, TrinaryOperation trinaryOperation)
+        {
+            ElementType* pOutputData = output.GetDataPointer();
+            const ElementType* pVectortAData = vectorA.GetConstDataPointer();
+            const ElementType* pVectorBData = vectorB.GetConstDataPointer();
+            const ElementType* pOutputEnd = pOutputData + output.GetIncrement() * output.Size();
+
+            while (pOutputData < pOutputEnd)
+            {
+                trinaryOperation(*pVectortAData, *pVectorBData, *pOutputData);
+                pVectortAData += vectorA.GetIncrement();
+                pVectorBData += vectorB.GetIncrement();
+                pOutputData += output.GetIncrement();
+            }
+        }
+
+        template <typename ElementType, VectorOrientation orientation>
+        void VectorOperations<ImplementationType::native>::AddUpdate(ElementType scalar, VectorReference<ElementType, orientation> vector)
+        {
+            UnaryVectorUpdateImplementation(vector, [scalar](ElementType& v) { v += scalar; });
+        }
+
+        // vectorB += scalarA
+        template <typename ElementType, VectorOrientation orientation>
+        void VectorOperations<ImplementationType::native>::AddUpdate(ConstVectorReference<ElementType, orientation> vectorA, VectorReference<ElementType, orientation> vectorB)
+        {
+            BinaryVectorUpdateImplementation(vectorA, vectorB, [](ElementType a, ElementType& b) { b += a; });
+        }
+
+        // output = scalar + vector
+        template <typename ElementType, VectorOrientation orientation>
+        void VectorOperations<ImplementationType::native>::AddSet(ElementType scalar, ConstVectorReference<ElementType, orientation> vector, VectorReference<ElementType, orientation> output)
+        {
+            BinaryVectorUpdateImplementation(vector, output, [scalar](ElementType a, ElementType& o) { o = scalar + a; });
+        }
+
+        // output = vectorA + vectorB
+        template <typename ElementType, VectorOrientation orientation>
+        void VectorOperations<ImplementationType::native>::AddSet(ConstVectorReference<ElementType, orientation> vectorA, ConstVectorReference<ElementType, orientation> vectorB, VectorReference<ElementType, orientation> output)
+        {
+            TrinaryVectorUpdateImplementation(vectorA, vectorB, output, [](ElementType a, ElementType b, ElementType& o) { o = a + b; });
+        }
+
+        // vector *= scalar
+        template <typename ElementType, VectorOrientation orientation>
+        void VectorOperations<ImplementationType::native>::ScaleUpdate(ElementType scalar, VectorReference<ElementType, orientation> vector)
+        {
+            UnaryVectorUpdateImplementation(vector, [scalar](ElementType& v) { v *= scalar; });
+        }
+
+        // output = scalar * vector
+        template <typename ElementType, VectorOrientation orientation>
+        void VectorOperations<ImplementationType::native>::ScaleSet(ElementType scalar, ConstVectorReference<ElementType, orientation> vector, VectorReference<ElementType, orientation> output)
+        {
+            BinaryVectorUpdateImplementation(vector, output, [scalar](ElementType a, ElementType& o) { o = scalar * a; });
+        }
+
+        // vectorB += scalarA * vectorA
+        template <typename ElementType, VectorOrientation orientation>
+        void VectorOperations<ImplementationType::native>::ScaleAddUpdate(ElementType scalarA, ConstVectorReference<ElementType, orientation> vectorA, One, VectorReference<ElementType, orientation> vectorB)
+        {
+            BinaryVectorUpdateImplementation(vectorA, vectorB, [scalarA](ElementType a, ElementType& b) { b += scalarA * a; });
+        }
+
+        // vectorB =  scalarA + scalarB * vectorB
+        template <typename ElementType, VectorOrientation orientation>
+        void VectorOperations<ImplementationType::native>::ScaleAddUpdate(ElementType scalarA, OnesVector, ElementType scalarB, VectorReference<ElementType, orientation> vectorB)
+        {
+            UnaryVectorUpdateImplementation(vectorB, [scalarA, scalarB](ElementType& b) { b = scalarA + scalarB * b; });
+        }
+
+        // vectorB =  vectorA + scalarB * vectorB
+        template <typename ElementType, VectorOrientation orientation>
+        void VectorOperations<ImplementationType::native>::ScaleAddUpdate(One, ConstVectorReference<ElementType, orientation> vectorA, ElementType scalarB, VectorReference<ElementType, orientation> vectorB)
+        {
+            BinaryVectorUpdateImplementation(vectorA, vectorB, [scalarB](ElementType a, ElementType& b) { b = a + scalarB * b; });
+        }
+
+        // vectorB =  scalarA * vectorA + scalarB * vectorB
+        template <typename ElementType, VectorOrientation orientation>
+        void VectorOperations<ImplementationType::native>::ScaleAddUpdate(ElementType scalarA, ConstVectorReference<ElementType, orientation> vectorA, ElementType scalarB, VectorReference<ElementType, orientation> vectorB)
+        {
+            BinaryVectorUpdateImplementation(vectorA, vectorB, [scalarA, scalarB](ElementType a, ElementType& b) { b = scalarA * a + scalarB * b; });
+        }
+
+        // output = scalarA * vectorA + vectorB
+        template <typename ElementType, VectorOrientation orientation>
+        void VectorOperations<ImplementationType::native>::ScaleAddSet(ElementType scalarA, ConstVectorReference<ElementType, orientation> vectorA, One, ConstVectorReference<ElementType, orientation> vectorB, VectorReference<ElementType, orientation> output)
+        {
+            TrinaryVectorUpdateImplementation(vectorA, vectorB, output, [scalarA](ElementType a, ElementType b, ElementType& o) { o = scalarA * a + b; });
+        }
+
+        // output = scalarA * ones + scalarB * vectorB
+        template <typename ElementType, VectorOrientation orientation>
+        void VectorOperations<ImplementationType::native>::ScaleAddSet(ElementType scalarA, OnesVector, ElementType scalarB, ConstVectorReference<ElementType, orientation> vectorB, VectorReference<ElementType, orientation> output)
+        {
+            BinaryVectorUpdateImplementation(vectorB, output, [scalarA, scalarB](ElementType b, ElementType& o) { o = scalarA + scalarB * b; });
+        }
+
+        // vectorB = vectorA + scalarB * vectorB
+        template <typename ElementType, VectorOrientation orientation>
+        void VectorOperations<ImplementationType::native>::ScaleAddSet(One, ConstVectorReference<ElementType, orientation> vectorA, ElementType scalarB, ConstVectorReference<ElementType, orientation> vectorB, VectorReference<ElementType, orientation> output)
+        {
+            TrinaryVectorUpdateImplementation(vectorA, vectorB, output, [scalarB](ElementType a, ElementType b, ElementType& o) { o = a + scalarB * b; });
+        }
+
+        // output = scalarA * vectorA + scalarB * vectorB
+        template <typename ElementType, VectorOrientation orientation>
+        void VectorOperations<ImplementationType::native>::ScaleAddSet(ElementType scalarA, ConstVectorReference<ElementType, orientation> vectorA, ElementType scalarB, ConstVectorReference<ElementType, orientation> vectorB, VectorReference<ElementType, orientation> output)
+        {
+            TrinaryVectorUpdateImplementation(vectorA, vectorB, output, [scalarA, scalarB](ElementType a, ElementType b, ElementType& o) { o = scalarA * a + scalarB * b; });
+        }
+
+#ifdef USE_BLAS
+        //
+        // OpenBlasVectorOperations
+        //
+
+        template <typename ElementType>
+        void VectorOperations<ImplementationType::openBlas>::InnerProduct(ConstRowVectorReference<ElementType> vectorA, ConstColumnVectorReference<ElementType> vectorB, ElementType& result)
+        {
+            result = Blas::Dot(static_cast<int>(vectorA.Size()), vectorA.GetConstDataPointer(), static_cast<int>(vectorA.GetIncrement()), vectorB.GetConstDataPointer(), static_cast<int>(vectorB.GetIncrement()));
+        }
+
+        template <typename ElementType, MatrixLayout layout>
+        void VectorOperations<ImplementationType::openBlas>::OuterProduct(ConstColumnVectorReference<ElementType> vectorA, ConstRowVectorReference<ElementType> vectorB, MatrixReference<ElementType, layout> matrix)
+        {
+            matrix.Reset();
+            Blas::Ger(matrix.GetLayout(), static_cast<int>(matrix.NumRows()), static_cast<int>(matrix.NumColumns()), static_cast<ElementType>(1.0), vectorA.GetConstDataPointer(), static_cast<int>(vectorA.GetIncrement()), vectorB.GetConstDataPointer(), static_cast<int>(vectorB.GetIncrement()), matrix.GetDataPointer(), static_cast<int>(matrix.GetIncrement()));
+        }
+
+        template <typename ElementType, VectorOrientation orientation>
+        void VectorOperations<ImplementationType::openBlas>::AddSet(ElementType scalar, ConstVectorReference<ElementType, orientation> vector, VectorReference<ElementType, orientation> output)
+        {
+            output.Fill(scalar);
+            AddUpdate(vector, output);
+        }
+
+        template <typename ElementType, VectorOrientation orientation>
+        void VectorOperations<ImplementationType::openBlas>::AddSet(ConstVectorReference<ElementType, orientation> vectorA, ConstVectorReference<ElementType, orientation> vectorB, VectorReference<ElementType, orientation> output)
+        {
+            output.CopyFrom(vectorA);
+            AddUpdate(vectorB, output);
+        }
+
+        template <typename ElementType, VectorOrientation orientation>
+        void VectorOperations<ImplementationType::openBlas>::AddUpdate(ElementType scalar, VectorReference<ElementType, orientation> vector)
+        {
+            UnaryVectorUpdateImplementation(vector, [scalar](ElementType& v) { v += scalar; });
+        }
+
+        // vectorB += vectorA
+        template <typename ElementType, VectorOrientation orientation>
+        void VectorOperations<ImplementationType::openBlas>::AddUpdate(ConstVectorReference<ElementType, orientation> vectorA, VectorReference<ElementType, orientation> vectorB)
+        {
+            ScaleAddUpdate(static_cast<ElementType>(1.0), vectorA, One(), vectorB);
+        }
+
+        // vector *= scalar
+        template <typename ElementType, VectorOrientation orientation>
+        void VectorOperations<ImplementationType::openBlas>::ScaleUpdate(ElementType scalar, VectorReference<ElementType, orientation> vector)
+        {
+            Blas::Scal(static_cast<int>(vector.Size()), scalar, vector.GetDataPointer(), static_cast<int>(vector.GetIncrement()));
+        }
+
+        // output = scalar * vector
+        template <typename ElementType, VectorOrientation orientation>
+        void VectorOperations<ImplementationType::openBlas>::ScaleSet(ElementType scalar, ConstVectorReference<ElementType, orientation> vector, VectorReference<ElementType, orientation> output)
+        {
+            ScaleAddUpdate(scalar, vector, static_cast<ElementType>(0.0), output);
+        }
+
+        // vectorB += scalarA * vectorA
+        template <typename ElementType, VectorOrientation orientation>
+        void VectorOperations<ImplementationType::openBlas>::ScaleAddUpdate(ElementType scalarA, ConstVectorReference<ElementType, orientation> vectorA, One, VectorReference<ElementType, orientation> vectorB)
+        {
+            Blas::Axpy(static_cast<int>(vectorB.Size()), scalarA, vectorA.GetConstDataPointer(), static_cast<int>(vectorA.GetIncrement()), vectorB.GetDataPointer(), static_cast<int>(vectorB.GetIncrement()));
+        }
+
+        // vectorB =  scalarA + scalarB * vectorB
+        template <typename ElementType, VectorOrientation orientation>
+        void VectorOperations<ImplementationType::openBlas>::ScaleAddUpdate(ElementType scalarA, OnesVector, ElementType scalarB, VectorReference<ElementType, orientation> vectorB)
+        {
+            Blas::Scal(static_cast<int>(vectorB.Size()), scalarB, vectorB.GetDataPointer(), static_cast<int>(vectorB.GetIncrement()));
+            math::AddUpdate(scalarA, vectorB);
+        }
+
+        // vectorB =  vectorA + scalarB * vectorB
+        template <typename ElementType, VectorOrientation orientation>
+        void VectorOperations<ImplementationType::openBlas>::ScaleAddUpdate(One, ConstVectorReference<ElementType, orientation> vectorA, ElementType scalarB, VectorReference<ElementType, orientation> vectorB)
+        {
+            Blas::Scal(static_cast<int>(vectorB.Size()), scalarB, vectorB.GetDataPointer(), static_cast<int>(vectorB.GetIncrement()));
+            AddUpdate(vectorA, vectorB);
+        }
+
+        // vectorB = scalarA * vectorA + scalarB * vectorB
+        template <typename ElementType, VectorOrientation orientation>
+        void VectorOperations<ImplementationType::openBlas>::ScaleAddUpdate(ElementType scalarA, ConstVectorReference<ElementType, orientation> vectorA, ElementType scalarB, VectorReference<ElementType, orientation> vectorB)
+        {
+            Blas::Scal(static_cast<int>(vectorB.Size()), scalarB, vectorB.GetDataPointer(), static_cast<int>(vectorB.GetIncrement()));
+            Blas::Axpy(static_cast<int>(vectorB.Size()), scalarA, vectorA.GetConstDataPointer(), static_cast<int>(vectorA.GetIncrement()), vectorB.GetDataPointer(), static_cast<int>(vectorB.GetIncrement()));
+        }
+
+        // output = scalarA * vectorA + vectorB
+        template <typename ElementType, VectorOrientation orientation>
+        void VectorOperations<ImplementationType::openBlas>::ScaleAddSet(ElementType scalarA, ConstVectorReference<ElementType, orientation> vectorA, One, ConstVectorReference<ElementType, orientation> vectorB, VectorReference<ElementType, orientation> output)
+        {
+            output.CopyFrom(vectorB);
+            ScaleAddUpdate(scalarA, vectorA, One(), output);
+        }
+
+        // vectorC = scalarA * ones + scalarB * vectorB
+        template <typename ElementType, VectorOrientation orientation>
+        void VectorOperations<ImplementationType::openBlas>::ScaleAddSet(ElementType scalarA, OnesVector, ElementType scalarB, ConstVectorReference<ElementType, orientation> vectorB, VectorReference<ElementType, orientation> output)
+        {
+            output.Fill(scalarA);
+            ScaleAddUpdate(scalarB, vectorB, One(), output);
+        }
+
+        // output = vectorA + scalarB * vectorB
+        template <typename ElementType, VectorOrientation orientation>
+        void VectorOperations<ImplementationType::openBlas>::ScaleAddSet(One, ConstVectorReference<ElementType, orientation> vectorA, ElementType scalarB, ConstVectorReference<ElementType, orientation> vectorB, VectorReference<ElementType, orientation> output)
+        {
+            ScaleSet(scalarB, vectorB, output);
+            AddUpdate(vectorA, output);
+        }
+
+        // vectorC = scalarA * vectorA + scalarB * vectorB
+        template <typename ElementType, VectorOrientation orientation>
+        void VectorOperations<ImplementationType::openBlas>::ScaleAddSet(ElementType scalarA, ConstVectorReference<ElementType, orientation> vectorA, ElementType scalarB, ConstVectorReference<ElementType, orientation> vectorB, VectorReference<ElementType, orientation> output)
+        {
+            ScaleSet(scalarA, vectorA, output);
+            ScaleAddUpdate(scalarB, vectorB, One(), output);
+        }
+
+#endif // USE_BLAS
+    } // namespace Internal
+} // namespace math
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/math/tcc/Matrix.tcc b/libraries/math/tcc/Matrix.tcc
deleted file mode 100644
index 18ff7b37d..000000000
--- a/libraries/math/tcc/Matrix.tcc
+++ /dev/null
@@ -1,504 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     Matrix.tcc (math)
-//  Authors:  Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#include "../include/VectorOperations.h"
-
-#include <utilities/include/Debug.h>
-#include <utilities/include/Exception.h>
-#include <utilities/include/Unused.h>
-
-#include <algorithm>
-
-namespace ell
-{
-namespace math
-{
-    //
-    // CommonMatrixBase
-    //
-
-    template <typename ElementType>
-    CommonMatrixBase<ElementType>::CommonMatrixBase(const ElementType* pData, size_t numRows, size_t numColumns, size_t increment) :
-        _pData(pData),
-        _numRows(numRows),
-        _numColumns(numColumns),
-        _increment(increment)
-    {
-    }
-
-    template <typename ElementType>
-    void CommonMatrixBase<ElementType>::Swap(CommonMatrixBase<ElementType>& other)
-    {
-        using std::swap;
-        swap(_pData, other._pData);
-        swap(_numRows, other._numRows);
-        swap(_numColumns, other._numColumns);
-        swap(_increment, other._increment);
-    }
-
-    //
-    // MatrixBase
-    //
-
-    // Row-major
-    template <typename ElementType>
-    MatrixBase<ElementType, MatrixLayout::rowMajor>::MatrixBase(const ElementType* pData, size_t numRows, size_t numColumns) :
-        CommonMatrixBase<ElementType>(pData, numRows, numColumns, numColumns)
-    {
-    }
-
-    template <typename ElementType>
-    MatrixBase<ElementType, MatrixLayout::rowMajor>::MatrixBase(const ElementType* pData, size_t numRows, size_t numColumns, size_t increment) :
-        CommonMatrixBase<ElementType>(pData, numRows, numColumns, increment)
-    {
-    }
-
-    template <typename ElementType>
-    void MatrixBase<ElementType, MatrixLayout::rowMajor>::Swap(MatrixBase<ElementType, MatrixLayout::rowMajor>& other)
-    {
-        CommonMatrixBase<ElementType>::Swap(other);
-    }
-
-    // Column-major
-    template <typename ElementType>
-    MatrixBase<ElementType, MatrixLayout::columnMajor>::MatrixBase(const ElementType* pData, size_t numRows, size_t numColumns) :
-        CommonMatrixBase<ElementType>(pData, numRows, numColumns, numRows)
-    {
-    }
-
-    template <typename ElementType>
-    MatrixBase<ElementType, MatrixLayout::columnMajor>::MatrixBase(const ElementType* pData, size_t numRows, size_t numColumns, size_t increment) :
-        CommonMatrixBase<ElementType>(pData, numRows, numColumns, increment)
-    {
-    }
-
-    template <typename ElementType>
-    void MatrixBase<ElementType, MatrixLayout::columnMajor>::Swap(MatrixBase<ElementType, MatrixLayout::columnMajor>& other)
-    {
-        CommonMatrixBase<ElementType>::Swap(other);
-    }
-
-    //
-    // ConstMatrixReference
-    //
-    template <typename ElementType, MatrixLayout layout>
-    ConstMatrixReference<ElementType, layout>::ConstMatrixReference(const ElementType* pData, size_t numRows, size_t numColumns, size_t increment) :
-        MatrixBase<ElementType, layout>(pData, numRows, numColumns, increment)
-    {
-    }
-
-    template <typename ElementType, MatrixLayout layout>
-    ConstMatrixReference<ElementType, layout>::ConstMatrixReference(const ElementType* pData, size_t numRows, size_t numColumns) :
-        MatrixBase<ElementType, layout>(pData, numRows, numColumns)
-    {
-    }
-
-    template <typename ElementType, MatrixLayout layout>
-    ElementType ConstMatrixReference<ElementType, layout>::operator()(size_t rowIndex, size_t columnIndex) const
-    {
-        using namespace std::string_literals;
-        DEBUG_THROW(rowIndex >= this->NumRows() || columnIndex >= this->NumColumns(), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "("s + std::to_string(rowIndex) + ", " + std::to_string(columnIndex) + ") exceeds matrix dimensions (" + std::to_string(this->NumRows()) + " x " + std::to_string(this->NumColumns()) + "."));
-
-        return GetConstDataPointer()[rowIndex * this->GetRowIncrement() + columnIndex * this->GetColumnIncrement()];
-    }
-
-    template <typename ElementType, MatrixLayout layout>
-    std::vector<ElementType> ConstMatrixReference<ElementType, layout>::ToArray() const
-    {
-        std::vector<ElementType> v(this->Size());
-        auto vIterator = v.begin();
-        for (size_t i = 0; i < this->GetMinorSize(); ++i)
-        {
-            auto pIntervalData = GetMajorVectorBegin(i);
-            std::copy(pIntervalData, pIntervalData + this->GetMajorSize(), vIterator);
-            vIterator += this->GetMajorSize();
-        }
-        return v;
-    }
-
-    template <typename ElementType, MatrixLayout layout>
-    void ConstMatrixReference<ElementType, layout>::Swap(ConstMatrixReference<ElementType, layout>& other)
-    {
-        MatrixBase<ElementType, layout>::Swap(other);
-    }
-
-    template <typename ElementType, MatrixLayout layout>
-    bool ConstMatrixReference<ElementType, layout>::IsEqual(ConstMatrixReference<ElementType, layout> other, ElementType tolerance) const
-    {
-        if (this->NumRows() != other.NumRows() || this->NumColumns() != other.NumColumns())
-        {
-            return false;
-        }
-
-        for (size_t i = 0; i < this->GetMinorSize(); ++i)
-        {
-            if (!GetMajorVector(i).IsEqual(other.GetMajorVector(i), tolerance))
-            {
-                return false;
-            }
-        }
-        return true;
-    }
-
-    template <typename ElementType, MatrixLayout layout>
-    bool ConstMatrixReference<ElementType, layout>::IsEqual(ConstMatrixReference<ElementType, TransposeMatrixLayout<layout>::value> other, ElementType tolerance) const
-    {
-        if (this->NumRows() != other.NumRows() || this->NumColumns() != other.NumColumns())
-        {
-            return false;
-        }
-
-        for (size_t i = 0; i < this->NumRows(); ++i)
-        {
-            if (!GetRow(i).IsEqual(other.GetRow(i), tolerance))
-            {
-                return false;
-            }
-        }
-        return true;
-    }
-
-    template <typename ElementType, MatrixLayout layout>
-    bool ConstMatrixReference<ElementType, layout>::operator==(const ConstMatrixReference<ElementType, layout>& other) const
-    {
-        return IsEqual(other);
-    }
-
-    template <typename ElementType, MatrixLayout layout>
-    bool ConstMatrixReference<ElementType, layout>::operator==(const ConstMatrixReference<ElementType, TransposeMatrixLayout<layout>::value>& other) const
-    {
-        return IsEqual(other);
-    }
-
-    template <typename ElementType, MatrixLayout layout>
-    template <MatrixLayout otherLayout>
-    bool ConstMatrixReference<ElementType, layout>::operator!=(const ConstMatrixReference<ElementType, otherLayout>& other)
-    {
-        return !(*this == other);
-    }
-
-    template <typename ElementType, MatrixLayout layout>
-    ConstMatrixReference<ElementType, layout> ConstMatrixReference<ElementType, layout>::GetSubMatrix(size_t firstRow, size_t firstColumn, size_t numRows, size_t numColumns) const
-    {
-        DEBUG_THROW(firstRow + numRows > this->NumRows() || firstColumn + numColumns > this->NumColumns(), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "block exceeds matrix dimensions."));
-
-        return ConstMatrixReference<ElementType, layout>(GetConstDataPointer() + firstRow * this->GetRowIncrement() + firstColumn * this->GetColumnIncrement(), numRows, numColumns, this->GetIncrement());
-    }
-
-    template <typename ElementType, MatrixLayout layout>
-    ConstColumnVectorReference<ElementType> ConstMatrixReference<ElementType, layout>::GetColumn(size_t index) const
-    {
-        DEBUG_THROW(index >= this->NumColumns(), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "column index exceeds matrix dimensions."));
-
-        return ConstColumnVectorReference<ElementType>(GetConstDataPointer() + index * this->GetColumnIncrement(), this->NumRows(), this->GetRowIncrement());
-    }
-
-    template <typename ElementType, MatrixLayout layout>
-    ConstRowVectorReference<ElementType> ConstMatrixReference<ElementType, layout>::GetRow(size_t index) const
-    {
-        DEBUG_THROW(index >= this->NumRows(), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "row index exceeds matrix dimensions."));
-
-        return ConstRowVectorReference<ElementType>(GetConstDataPointer() + index * this->GetRowIncrement(), this->NumColumns(), this->GetColumnIncrement());
-    }
-
-    template <typename ElementType, MatrixLayout layout>
-    ConstColumnVectorReference<ElementType> ConstMatrixReference<ElementType, layout>::GetDiagonal() const
-    {
-        auto size = std::min(this->NumColumns(), this->NumRows());
-        return ConstColumnVectorReference<ElementType>(GetConstDataPointer(), size, this->GetIncrement() + 1);
-    }
-
-    template <typename ElementType, MatrixLayout layout>
-    ConstColumnVectorReference<ElementType> ConstMatrixReference<ElementType, layout>::ReferenceAsVector() const
-    {
-        DEBUG_THROW(!IsContiguous(), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "Can only flatten a matrix when its memory is contiguous"));
-        return ConstColumnVectorReference<ElementType>(GetConstDataPointer(), this->NumRows() * this->NumColumns(), 1);
-    }
-
-    template <typename ElementType, MatrixLayout layout>
-    auto ConstMatrixReference<ElementType, layout>::Transpose() const -> ConstMatrixReference<ElementType, TransposeMatrixLayout<layout>::value>
-    {
-        return ConstMatrixReference<ElementType, TransposeMatrixLayout<layout>::value>(GetConstDataPointer(), this->NumColumns(), this->NumRows(), this->GetIncrement());
-    }
-
-    template <typename ElementType, MatrixLayout layout>
-    const ElementType* ConstMatrixReference<ElementType, layout>::GetMajorVectorBegin(size_t index) const
-    {
-        return GetConstDataPointer() + index * this->GetIncrement();
-    }
-
-    //
-    // MatrixReference
-    //
-
-    template <typename ElementType, MatrixLayout layout>
-    MatrixReference<ElementType, layout>::MatrixReference(ElementType* pData, size_t numRows, size_t numColumns, size_t increment) :
-        ConstMatrixReference<ElementType, layout>(pData, numRows, numColumns, increment)
-    {
-    }
-
-    template <typename ElementType, MatrixLayout layout>
-    MatrixReference<ElementType, layout>::MatrixReference(ElementType* pData, size_t numRows, size_t numColumns) :
-        ConstMatrixReference<ElementType, layout>(pData, numRows, numColumns)
-    {
-    }
-
-    template <typename ElementType, MatrixLayout layout>
-    ElementType& MatrixReference<ElementType, layout>::operator()(size_t rowIndex, size_t columnIndex)
-    {
-        using namespace std::string_literals;
-        DEBUG_THROW(rowIndex >= this->NumRows() || columnIndex >= this->NumColumns(), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "("s + std::to_string(rowIndex) + ", " + std::to_string(columnIndex) + ") exceeds matrix dimensions (" + std::to_string(this->NumRows()) + " x " + std::to_string(this->NumColumns()) + "."));
-
-        return GetDataPointer()[rowIndex * this->GetRowIncrement() + columnIndex * this->GetColumnIncrement()];
-    }
-
-    template <typename ElementType, MatrixLayout layout>
-    void MatrixReference<ElementType, layout>::CopyFrom(ConstMatrixReference<ElementType, layout> other)
-    {
-        if (this->NumRows() != other.NumRows() || this->NumColumns() != other.NumColumns())
-        {
-            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "Matrix dimensions are not the same.");
-        }
-
-        for (size_t i = 0; i < other.GetMinorSize(); ++i)
-        {
-            GetMajorVector(i).CopyFrom(other.GetMajorVector(i));
-        }
-    }
-
-    template <typename ElementType, MatrixLayout layout>
-    void MatrixReference<ElementType, layout>::CopyFrom(ConstMatrixReference<ElementType, TransposeMatrixLayout<layout>::value> other)
-    {
-        if (this->NumRows() != other.NumRows() || this->NumColumns() != other.NumColumns())
-        {
-            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "Matrix dimensions are not the same.");
-        }
-
-        for (size_t i = 0; i < other.NumRows(); ++i)
-        {
-            GetRow(i).CopyFrom(other.GetRow(i));
-        }
-    }
-
-    template <typename ElementType, MatrixLayout layout>
-    void MatrixReference<ElementType, layout>::Swap(MatrixReference<ElementType, layout>& other)
-    {
-        ConstMatrixReference<ElementType, layout>::Swap(other);
-    }
-
-    template <typename ElementType, MatrixLayout layout>
-    void MatrixReference<ElementType, layout>::Fill(ElementType value)
-    {
-        for (size_t i = 0; i < this->GetMinorSize(); ++i)
-        {
-            auto vector = GetMajorVector(i);
-            vector.Fill(value);
-        }
-    }
-
-    template <typename ElementType, MatrixLayout layout>
-    template <typename GeneratorType>
-    void MatrixReference<ElementType, layout>::Generate(GeneratorType generator)
-    {
-        for (size_t i = 0; i < this->GetMinorSize(); ++i)
-        {
-            GetMajorVector(i).Generate(generator);
-        }
-    }
-
-    template <typename ElementType, MatrixLayout layout>
-    template <typename TransformationType>
-    void MatrixReference<ElementType, layout>::Transform(TransformationType transformation)
-    {
-        for (size_t i = 0; i < this->GetMinorSize(); ++i)
-        {
-            TransformUpdate(transformation, GetMajorVector(i));
-        }
-    }
-
-    template <typename ElementType, MatrixLayout layout>
-    auto MatrixReference<ElementType, layout>::Transpose() -> MatrixReference<ElementType, TransposeMatrixLayout<layout>::value>
-    {
-        return MatrixReference<ElementType, TransposeMatrixLayout<layout>::value>(GetDataPointer(), this->NumColumns(), this->NumRows(), this->GetIncrement());
-    }
-
-    template <typename ElementType, MatrixLayout layout>
-    MatrixReference<ElementType, layout> MatrixReference<ElementType, layout>::GetSubMatrix(size_t firstRow, size_t firstColumn, size_t numRows, size_t numColumns)
-    {
-        DEBUG_THROW(firstRow + numRows > this->NumRows() || firstColumn + numColumns > this->NumColumns(), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "block exceeds matrix dimensions."));
-
-        return MatrixReference<ElementType, layout>(GetDataPointer() + firstRow * this->GetRowIncrement() + firstColumn * this->GetColumnIncrement(), numRows, numColumns, this->GetIncrement());
-    }
-
-    template <typename ElementType, MatrixLayout layout>
-    ColumnVectorReference<ElementType> MatrixReference<ElementType, layout>::GetColumn(size_t index)
-    {
-        DEBUG_THROW(index >= this->NumColumns(), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "column index exceeds matrix dimensions."));
-
-        return ColumnVectorReference<ElementType>(GetDataPointer() + index * this->GetColumnIncrement(), this->NumRows(), this->GetRowIncrement());
-    }
-
-    template <typename ElementType, MatrixLayout layout>
-    RowVectorReference<ElementType> MatrixReference<ElementType, layout>::GetRow(size_t index)
-    {
-        DEBUG_THROW(index >= this->NumRows(), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "row index exceeds matrix dimensions."));
-
-        return RowVectorReference<ElementType>(GetDataPointer() + index * this->GetRowIncrement(), this->NumColumns(), this->GetColumnIncrement());
-    }
-
-    template <typename ElementType, MatrixLayout layout>
-    ColumnVectorReference<ElementType> MatrixReference<ElementType, layout>::GetDiagonal()
-    {
-        auto size = std::min(this->NumColumns(), this->NumRows());
-        return ColumnVectorReference<ElementType>(GetDataPointer(), size, this->GetIncrement() + 1);
-    }
-
-    template <typename ElementType, MatrixLayout layout>
-    ColumnVectorReference<ElementType> MatrixReference<ElementType, layout>::ReferenceAsVector()
-    {
-        DEBUG_THROW(!IsContiguous(), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "Can only flatten a matrix when its memory is contiguous"));
-        return ColumnVectorReference<ElementType>(GetDataPointer(), this->NumRows() * this->NumColumns(), 1);
-    }
-
-    //
-    // Matrix
-    //
-
-    template <typename ElementType, MatrixLayout layout>
-    Matrix<ElementType, layout>::Matrix(size_t numRows, size_t numColumns) :
-        MatrixReference<ElementType, layout>(nullptr, numRows, numColumns),
-        _data(numRows * numColumns)
-    {
-        this->_pData = _data.data();
-    }
-
-    template <typename ElementType, MatrixLayout layout>
-    Matrix<ElementType, layout>::Matrix(std::initializer_list<std::initializer_list<ElementType>> list) :
-        MatrixReference<ElementType, layout>(nullptr, list.size(), list.begin()->size()),
-        _data(list.size() * list.begin()->size())
-    {
-        this->_pData = _data.data();
-        auto numColumns = list.begin()->size();
-        DEBUG_USED(numColumns);
-
-        size_t i = 0;
-        for (auto rowIter = list.begin(); rowIter < list.end(); ++rowIter)
-        {
-            DEBUG_THROW(rowIter->size() != numColumns, utilities::InputException(utilities::InputExceptionErrors::sizeMismatch, "incorrect number of elements in initializer list"));
-
-            size_t j = 0;
-            for (auto elementIter = rowIter->begin(); elementIter < rowIter->end(); ++elementIter)
-            {
-                (*this)(i, j) = *elementIter;
-                ++j;
-            }
-            ++i;
-        }
-    }
-
-    template <typename ElementType, MatrixLayout layout>
-    Matrix<ElementType, layout>::Matrix(size_t numRows, size_t numColumns, const std::vector<ElementType>& data) :
-        MatrixReference<ElementType, layout>(nullptr, numRows, numColumns),
-        _data(data)
-    {
-        this->_pData = _data.data();
-    }
-
-    template <typename ElementType, MatrixLayout layout>
-    Matrix<ElementType, layout>::Matrix(size_t numRows, size_t numColumns, std::vector<ElementType>&& data) :
-        MatrixReference<ElementType, layout>(nullptr, numRows, numColumns),
-        _data(std::move(data))
-    {
-        this->_pData = _data.data();
-    }
-
-    template <typename ElementType, MatrixLayout layout>
-    Matrix<ElementType, layout>::Matrix(Matrix<ElementType, layout>&& other) :
-        MatrixReference<ElementType, layout>(nullptr, other.NumRows(), other.NumColumns()),
-        _data(std::move(other._data))
-    {
-        this->_pData = _data.data();
-    }
-
-    template <typename ElementType, MatrixLayout layout>
-    Matrix<ElementType, layout>::Matrix(const Matrix<ElementType, layout>& other) :
-        MatrixReference<ElementType, layout>(nullptr, other.NumRows(), other.NumColumns()),
-        _data(other._data)
-    {
-        this->_pData = _data.data();
-    }
-
-    template <typename ElementType, MatrixLayout layout>
-    Matrix<ElementType, layout>::Matrix(ConstMatrixReference<ElementType, layout>& other) :
-        MatrixReference<ElementType, layout>(nullptr, other.NumRows(), other.NumColumns()),
-        _data(other.NumRows() * other.NumColumns())
-    {
-        this->_pData = _data.data();
-        for (size_t i = 0; i < this->NumRows(); ++i)
-        {
-            for (size_t j = 0; j < this->NumColumns(); ++j)
-            {
-                (*this)(i, j) = other(i, j);
-            }
-        }
-    }
-
-    template <typename ElementType, MatrixLayout layout>
-    Matrix<ElementType, layout>::Matrix(ConstMatrixReference<ElementType, TransposeMatrixLayout<layout>::value> other) :
-        MatrixReference<ElementType, layout>(nullptr, other.NumRows(), other.NumColumns()),
-        _data(other.NumRows() * other.NumColumns())
-    {
-        this->_pData = _data.data();
-        for (size_t i = 0; i < this->NumRows(); ++i)
-        {
-            for (size_t j = 0; j < this->NumColumns(); ++j)
-            {
-                (*this)(i, j) = other(i, j);
-            }
-        }
-    }
-
-    template <typename ElementType, MatrixLayout layout>
-    Matrix<ElementType, layout>& Matrix<ElementType, layout>::operator=(Matrix<ElementType, layout> other)
-    {
-        Swap(other);
-        return *this;
-    }
-
-    template <typename ElementType, MatrixLayout layout>
-    void Matrix<ElementType, layout>::Swap(Matrix<ElementType, layout>& other)
-    {
-        MatrixReference<ElementType, layout>::Swap(other);
-        std::swap(_data, other._data);
-    }
-
-    template <typename ElementType, MatrixLayout layout>
-    void MatrixArchiver::Write(const Matrix<ElementType, layout>& matrix, const std::string& name, utilities::Archiver& archiver)
-    {
-        archiver[GetRowsName(name)] << matrix.NumRows();
-        archiver[GetColumnsName(name)] << matrix.NumColumns();
-        archiver[GetValuesName(name)] << matrix.ToArray();
-    }
-
-    template <typename ElementType, MatrixLayout layout>
-    void MatrixArchiver::Read(Matrix<ElementType, layout>& matrix, const std::string& name, utilities::Unarchiver& archiver)
-    {
-        size_t rows = 0;
-        size_t columns = 0;
-        std::vector<ElementType> values;
-
-        archiver[GetRowsName(name)] >> rows;
-        archiver[GetColumnsName(name)] >> columns;
-        archiver[GetValuesName(name)] >> values;
-
-        Matrix<ElementType, layout> value(rows, columns, std::move(values));
-
-        matrix = std::move(value);
-    }
-} // namespace math
-} // namespace ell
diff --git a/libraries/math/tcc/MatrixOperations.tcc b/libraries/math/tcc/MatrixOperations.tcc
deleted file mode 100644
index 012949c78..000000000
--- a/libraries/math/tcc/MatrixOperations.tcc
+++ /dev/null
@@ -1,723 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     MatrixOperations.tcc (math)
-//  Authors:  Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#include "../include/VectorOperations.h"
-
-#include <utilities/include/Debug.h>
-#include <utilities/include/Exception.h>
-#include <utilities/include/Logger.h>
-
-namespace ell
-{
-namespace math
-{
-    template <typename ElementType, MatrixLayout layout>
-    void Print(ConstMatrixReference<ElementType, layout> M, std::ostream& stream, size_t indent, size_t maxRows, size_t maxElementsPerRow)
-    {
-        using namespace logging;
-
-        stream << std::string(indent, ' ') << "{";
-        if (M.NumRows() > 0)
-        {
-            Print(M.GetRow(0), stream, 1, maxElementsPerRow);
-        }
-
-        if (M.NumRows() <= maxRows)
-        {
-            for (size_t i = 1; i < M.NumRows(); ++i)
-            {
-                stream << "," << EOL;
-                Print(M.GetRow(i), stream, indent + 2, maxElementsPerRow);
-            }
-        }
-        else
-        {
-            for (size_t i = 1; i < maxRows - 2; ++i)
-            {
-                stream << "," << EOL;
-                Print(M.GetRow(i), stream, indent + 2, maxElementsPerRow);
-            }
-            stream << "," << EOL
-                   << std::string(indent + 2, ' ') << "...," << EOL;
-            Print(M.GetRow(M.NumRows() - 1), stream, indent + 2, maxElementsPerRow);
-        }
-        stream << " }" << EOL;
-    }
-
-    template <typename ElementType, MatrixLayout layout>
-    std::ostream& operator<<(std::ostream& stream, ConstMatrixReference<ElementType, layout> M)
-    {
-        Print(M, stream);
-        return stream;
-    }
-
-    template <typename MatrixElementType, MatrixLayout layout, typename ScalarElementType, utilities::IsFundamental<ScalarElementType>>
-    void operator+=(MatrixReference<MatrixElementType, layout> matrix, ScalarElementType scalar)
-    {
-        AddUpdate(static_cast<MatrixElementType>(scalar), matrix);
-    }
-
-    template <typename ElementType, MatrixLayout layoutA, MatrixLayout layoutB>
-    void operator+=(MatrixReference<ElementType, layoutB> matrixB, ConstMatrixReference<ElementType, layoutA> matrixA)
-    {
-        AddUpdate(matrixA, matrixB);
-    }
-
-    template <typename MatrixElementType, MatrixLayout layout, typename ScalarElementType, utilities::IsFundamental<ScalarElementType>>
-    void operator-=(MatrixReference<MatrixElementType, layout> matrix, ScalarElementType scalar)
-    {
-        AddUpdate(-static_cast<MatrixElementType>(scalar), matrix);
-    }
-
-    template <typename ElementType, MatrixLayout layoutA, MatrixLayout layoutB>
-    void operator-=(MatrixReference<ElementType, layoutB> matrixB, ConstMatrixReference<ElementType, layoutA> matrixA)
-    {
-        ScaleAddUpdate(static_cast<ElementType>(-1), matrixA, One(), matrixB);
-    }
-
-    template <typename MatrixElementType, MatrixLayout layout, typename ScalarElementType, utilities::IsFundamental<ScalarElementType>>
-    void operator*=(MatrixReference<MatrixElementType, layout> matrix, ScalarElementType scalar)
-    {
-        ScaleUpdate(static_cast<MatrixElementType>(scalar), matrix);
-    }
-
-    template <typename MatrixElementType, MatrixLayout layout, typename ScalarElementType, utilities::IsFundamental<ScalarElementType>>
-    void operator/=(MatrixReference<MatrixElementType, layout> matrix, ScalarElementType scalar)
-    {
-        DEBUG_THROW(scalar == 0, utilities::NumericException(utilities::NumericExceptionErrors::divideByZero, "divide by zero"));
-
-        ScaleUpdate(1 / static_cast<MatrixElementType>(scalar), matrix);
-    }
-
-    template <ImplementationType implementation, typename ElementType, MatrixLayout layout>
-    void AddUpdate(ElementType scalar, MatrixReference<ElementType, layout> matrix)
-    {
-        if (scalar == 0)
-        {
-            return;
-        }
-        if (matrix.IsContiguous())
-        {
-            Internal::VectorOperations<implementation>::AddUpdate(scalar, matrix.ReferenceAsVector());
-        }
-        else
-        {
-            for (size_t i = 0; i < matrix.GetMinorSize(); ++i)
-            {
-                Internal::VectorOperations<implementation>::AddUpdate(scalar, matrix.GetMajorVector(i));
-            }
-        }
-    }
-
-    namespace Internal
-    {
-        template <ImplementationType implementation, typename ElementType, MatrixLayout layout>
-        void AddUpdateAsVectors(ConstMatrixReference<ElementType, layout> matrixA, MatrixReference<ElementType, layout> matrixB)
-        {
-            if (matrixA.IsContiguous() && matrixB.IsContiguous())
-            {
-                Internal::VectorOperations<implementation>::AddUpdate(matrixA.ReferenceAsVector(), matrixB.ReferenceAsVector());
-            }
-            else
-            {
-                for (size_t i = 0; i < matrixA.GetMinorSize(); ++i)
-                {
-                    Internal::VectorOperations<implementation>::AddUpdate(matrixA.GetMajorVector(i), matrixB.GetMajorVector(i));
-                }
-            }
-        }
-
-        template <ImplementationType implementation, typename ElementType, MatrixLayout layout>
-        void AddUpdateAsVectors(ConstMatrixReference<ElementType, layout> matrixA, MatrixReference<ElementType, TransposeMatrixLayout<layout>::value> matrixB)
-        {
-            for (size_t i = 0; i < matrixA.NumRows(); ++i)
-            {
-                Internal::VectorOperations<implementation>::AddUpdate(matrixA.GetRow(i), matrixB.GetRow(i));
-            }
-        }
-    } // namespace Internal
-
-    template <ImplementationType implementation, typename ElementType, MatrixLayout layoutA, MatrixLayout layoutB>
-    void AddUpdate(ConstMatrixReference<ElementType, layoutA> matrixA, MatrixReference<ElementType, layoutB> matrixB)
-    {
-        DEBUG_CHECK_SIZES(matrixA.NumRows() != matrixB.NumRows() || matrixA.NumColumns() != matrixB.NumColumns(), "Incompatible matrix sizes.");
-
-        Internal::AddUpdateAsVectors<implementation>(matrixA, matrixB);
-    }
-
-    namespace Internal
-    {
-        template <ImplementationType implementation, typename ElementType, MatrixLayout layout>
-        void AddSetAsVectors(ElementType scalar, ConstMatrixReference<ElementType, layout> matrix, MatrixReference<ElementType, layout> output)
-        {
-            if (matrix.IsContiguous() && output.IsContiguous())
-            {
-                Internal::VectorOperations<implementation>::AddSet(scalar, matrix.ReferenceAsVector(), output.ReferenceAsVector());
-            }
-            else
-            {
-                for (size_t i = 0; i < matrix.GetMinorSize(); ++i)
-                {
-                    Internal::VectorOperations<implementation>::AddSet(scalar, matrix.GetMajorVector(i), output.GetMajorVector(i));
-                }
-            }
-        }
-
-        template <ImplementationType implementation, typename ElementType, MatrixLayout layout>
-        void AddSetAsVectors(ElementType scalar, ConstMatrixReference<ElementType, layout> matrix, MatrixReference<ElementType, TransposeMatrixLayout<layout>::value> output)
-        {
-            for (size_t i = 0; i < matrix.NumRows(); ++i)
-            {
-                Internal::VectorOperations<implementation>::AddSet(scalar, matrix.GetRow(i), output.GetRow(i));
-            }
-        }
-
-        template <ImplementationType implementation, typename ElementType, MatrixLayout layout>
-        void AddSetAsVectors(ConstMatrixReference<ElementType, layout> matrixA, ConstMatrixReference<ElementType, layout> matrixB, MatrixReference<ElementType, layout> output)
-        {
-            if (matrixA.IsContiguous() && matrixB.IsContiguous() && output.IsContiguous())
-            {
-                Internal::VectorOperations<implementation>::AddSet(matrixA.ReferenceAsVector(), matrixB.ReferenceAsVector(), output.ReferenceAsVector());
-            }
-            else
-            {
-                for (size_t i = 0; i < matrixA.GetMinorSize(); ++i)
-                {
-                    Internal::VectorOperations<implementation>::AddSet(matrixA.GetMajorVector(i), matrixB.GetMajorVector(i), output.GetMajorVector(i));
-                }
-            }
-        }
-
-        template <ImplementationType implementation, typename ElementType, MatrixLayout layout>
-        void AddSetAsVectors(ConstMatrixReference<ElementType, layout> matrixA, ConstMatrixReference<ElementType, layout> matrixB, MatrixReference<ElementType, TransposeMatrixLayout<layout>::value> output)
-        {
-            for (size_t i = 0; i < matrixA.NumRows(); ++i)
-            {
-                Internal::VectorOperations<implementation>::AddSet(matrixA.GetRow(i), matrixB.GetRow(i), output.GetRow(i));
-            }
-        }
-
-        template <ImplementationType implementation, typename ElementType, MatrixLayout layout, MatrixLayout outputLayout>
-        void AddSetAsVectors(ConstMatrixReference<ElementType, layout> matrixA, ConstMatrixReference<ElementType, TransposeMatrixLayout<layout>::value> matrixB, MatrixReference<ElementType, outputLayout> output)
-        {
-            for (size_t i = 0; i < matrixA.NumRows(); ++i)
-            {
-                Internal::VectorOperations<implementation>::AddSet(matrixA.GetRow(i), matrixB.GetRow(i), output.GetRow(i));
-            }
-        }
-    } // namespace Internal
-
-    template <ImplementationType implementation, typename ElementType, MatrixLayout layout, MatrixLayout outputLayout>
-    void AddSet(ElementType scalar, ConstMatrixReference<ElementType, layout> matrix, MatrixReference<ElementType, outputLayout> output)
-    {
-        DEBUG_CHECK_SIZES(matrix.NumRows() != output.NumRows() || matrix.NumColumns() != output.NumColumns(), "Incompatible matrix sizes.");
-
-        if (scalar == 0)
-        {
-            output.CopyFrom(matrix);
-        }
-        else
-        {
-            Internal::AddSetAsVectors<implementation>(scalar, matrix, output);
-        }
-    }
-
-    template <ImplementationType implementation, typename ElementType, MatrixLayout layoutA, MatrixLayout layoutB, MatrixLayout outputLayout>
-    void AddSet(ConstMatrixReference<ElementType, layoutA> matrixA, ConstMatrixReference<ElementType, layoutB> matrixB, MatrixReference<ElementType, outputLayout> output)
-    {
-        DEBUG_CHECK_SIZES(matrixA.NumRows() != matrixB.NumRows() || matrixA.NumColumns() != matrixB.NumColumns() || matrixA.NumRows() != output.NumRows() || matrixA.NumColumns() != output.NumColumns(), "Incompatible matrix sizes.");
-
-        Internal::AddSetAsVectors<implementation>(matrixA, matrixB, output);
-    }
-
-    template <ImplementationType implementation, typename ElementType, MatrixLayout layout>
-    void ScaleUpdate(ElementType scalar, MatrixReference<ElementType, layout> matrix)
-    {
-        if (scalar == 0)
-        {
-            matrix.Reset();
-        }
-        else if (scalar == 1)
-        {
-            return;
-        }
-        else if (matrix.IsContiguous())
-        {
-            Internal::VectorOperations<implementation>::ScaleUpdate(scalar, matrix.ReferenceAsVector());
-        }
-        else
-        {
-            for (size_t i = 0; i < matrix.GetMinorSize(); ++i)
-            {
-                Internal::VectorOperations<implementation>::ScaleUpdate(scalar, matrix.GetMajorVector(i));
-            }
-        }
-    }
-
-    // implementations of ScaleSet using the equivalent vector operation
-    namespace Internal
-    {
-        template <ImplementationType implementation, typename ElementType, MatrixLayout layout>
-        void ScaleSetAsVectors(ElementType scalar, ConstMatrixReference<ElementType, layout> matrix, MatrixReference<ElementType, layout> output)
-        {
-            if (matrix.IsContiguous() && output.IsContiguous())
-            {
-                Internal::VectorOperations<implementation>::ScaleSet(scalar, matrix.ReferenceAsVector(), output.ReferenceAsVector());
-            }
-            else
-            {
-                for (size_t i = 0; i < matrix.GetMinorSize(); ++i)
-                {
-                    Internal::VectorOperations<implementation>::ScaleSet(scalar, matrix.GetMajorVector(i), output.GetMajorVector(i));
-                }
-            }
-        }
-
-        template <ImplementationType implementation, typename ElementType, MatrixLayout layout>
-        void ScaleSetAsVectors(ElementType scalar, ConstMatrixReference<ElementType, layout> matrix, MatrixReference<ElementType, TransposeMatrixLayout<layout>::value> output)
-        {
-            for (size_t i = 0; i < matrix.NumRows(); ++i)
-            {
-                Internal::VectorOperations<implementation>::ScaleSet(scalar, matrix.GetRow(i), output.GetRow(i));
-            }
-        }
-    } // namespace Internal
-
-    template <ImplementationType implementation, typename ElementType, MatrixLayout matrixLayout, MatrixLayout outputLayout>
-    void ScaleSet(ElementType scalar, ConstMatrixReference<ElementType, matrixLayout> matrix, MatrixReference<ElementType, outputLayout> output)
-    {
-        DEBUG_CHECK_SIZES(matrix.NumRows() != output.NumRows() || matrix.NumColumns() != output.NumColumns(), "Incompatible matrix sizes.");
-
-        if (scalar == 0)
-        {
-            output.Reset();
-        }
-        else if (scalar == 1)
-        {
-            output.CopyFrom(matrix);
-        }
-        else
-        {
-            Internal::ScaleSetAsVectors<implementation>(scalar, matrix, output);
-        }
-    }
-
-    // implementations of ScaleAddUpdate using the equivalent vector operation
-    namespace Internal
-    {
-        template <ImplementationType implementation, typename ElementType, typename scalarAType, typename scalarBType, MatrixLayout layout>
-        void ScaleAddUpdateAsVectors(scalarAType scalarA, ConstMatrixReference<ElementType, layout> matrixA, scalarBType scalarB, MatrixReference<ElementType, layout> matrixB)
-        {
-            if (matrixA.IsContiguous() && matrixB.IsContiguous())
-            {
-                Internal::VectorOperations<implementation>::ScaleAddUpdate(scalarA, matrixA.ReferenceAsVector(), scalarB, matrixB.ReferenceAsVector());
-            }
-            else
-            {
-                for (size_t i = 0; i < matrixA.GetMinorSize(); ++i)
-                {
-                    Internal::VectorOperations<implementation>::ScaleAddUpdate(scalarA, matrixA.GetMajorVector(i), scalarB, matrixB.GetMajorVector(i));
-                }
-            }
-        }
-
-        template <ImplementationType implementation, typename ElementType, typename scalarAType, typename scalarBType, MatrixLayout layout>
-        void ScaleAddUpdateAsVectors(scalarAType scalarA, ConstMatrixReference<ElementType, layout> matrixA, scalarBType scalarB, MatrixReference<ElementType, TransposeMatrixLayout<layout>::value> matrixB)
-        {
-            for (size_t i = 0; i < matrixA.NumRows(); ++i)
-            {
-                Internal::VectorOperations<implementation>::ScaleAddUpdate(scalarA, matrixA.GetRow(i), scalarB, matrixB.GetRow(i));
-            }
-        }
-    } // namespace Internal
-
-    // matrixB += scalarA * matrixA
-    template <ImplementationType implementation, typename ElementType, MatrixLayout layoutA, MatrixLayout layoutB>
-    void ScaleAddUpdate(ElementType scalarA, ConstMatrixReference<ElementType, layoutA> matrixA, One, MatrixReference<ElementType, layoutB> matrixB)
-    {
-        DEBUG_CHECK_SIZES(matrixA.NumRows() != matrixB.NumRows() || matrixA.NumColumns() != matrixB.NumColumns(), "Incompatible matrix sizes.");
-
-        if (scalarA == 0)
-        {
-            return;
-        }
-        else if (scalarA == 1)
-        {
-            AddUpdate<implementation>(matrixA, matrixB);
-        }
-        else
-        {
-            Internal::ScaleAddUpdateAsVectors<implementation>(scalarA, matrixA, One(), matrixB);
-        }
-    }
-
-    // matrixB = scalarA * ones + scalarB * matrixB
-    template <ImplementationType implementation, typename ElementType, MatrixLayout layout>
-    void ScaleAddUpdate(ElementType scalarA, OnesMatrix, ElementType scalarB, MatrixReference<ElementType, layout> matrixB)
-    {
-        if (scalarA == 0)
-        {
-            ScaleUpdate<implementation>(scalarB, matrixB);
-        }
-        else if (scalarB == 0)
-        {
-            matrixB.Fill(scalarA);
-        }
-        else if (scalarB == 1)
-        {
-            AddUpdate<implementation>(scalarA, matrixB);
-        }
-        else if (matrixB.IsContiguous())
-        {
-            Internal::VectorOperations<implementation>::ScaleAddUpdate(scalarA, OnesVector(), scalarB, matrixB.ReferenceAsVector());
-        }
-        else
-        {
-            for (size_t i = 0; i < matrixB.GetMinorSize(); ++i)
-            {
-                Internal::VectorOperations<implementation>::ScaleAddUpdate(scalarA, OnesVector(), scalarB, matrixB.GetMajorVector(i));
-            }
-        }
-    }
-
-    // matrixB = matrixA + scalarB * matrixB
-    template <ImplementationType implementation, typename ElementType, MatrixLayout layoutA, MatrixLayout layoutB>
-    void ScaleAddUpdate(One, ConstMatrixReference<ElementType, layoutA> matrixA, ElementType scalarB, MatrixReference<ElementType, layoutB> matrixB)
-    {
-        DEBUG_CHECK_SIZES(matrixA.NumRows() != matrixB.NumRows() || matrixA.NumColumns() != matrixB.NumColumns(), "Incompatible matrix sizes.");
-
-        if (scalarB == 0)
-        {
-            matrixB.CopyFrom(matrixA);
-        }
-        else if (scalarB == 1)
-        {
-            AddUpdate<implementation>(matrixA, matrixB);
-        }
-        else
-        {
-            Internal::ScaleAddUpdateAsVectors<implementation>(One(), matrixA, scalarB, matrixB);
-        }
-    }
-
-    // matrixB = scalarA * matrixA + scalarB * matrixB
-    template <ImplementationType implementation, typename ElementType, MatrixLayout layoutA, MatrixLayout layoutB>
-    void ScaleAddUpdate(ElementType scalarA, ConstMatrixReference<ElementType, layoutA> matrixA, ElementType scalarB, MatrixReference<ElementType, layoutB> matrixB)
-    {
-        DEBUG_CHECK_SIZES(matrixA.NumRows() != matrixB.NumRows() || matrixA.NumColumns() != matrixB.NumColumns(), "Incompatible matrix sizes.");
-
-        if (scalarA == 0)
-        {
-            ScaleUpdate<implementation>(scalarB, matrixB);
-        }
-        else if (scalarA == 1)
-        {
-            ScaleAddUpdate<implementation>(One(), matrixA, scalarB, matrixB);
-        }
-        else if (scalarB == 0)
-        {
-            Internal::ScaleSetAsVectors<implementation>(scalarA, matrixA, matrixB);
-        }
-        else if (scalarB == 1)
-        {
-            Internal::ScaleAddUpdateAsVectors<implementation>(scalarA, matrixA, One(), matrixB);
-        }
-        else
-        {
-            Internal::ScaleAddUpdateAsVectors<implementation>(scalarA, matrixA, scalarB, matrixB);
-        }
-    }
-
-    // implementations of ScaleAddSet using the equivalent vector operation
-    namespace Internal
-    {
-        template <ImplementationType implementation, typename ElementType, typename scalarAType, typename scalarBType, MatrixLayout layout>
-        void ScaleAddSetAsVectors(scalarAType scalarA, ConstMatrixReference<ElementType, layout> matrixA, scalarBType scalarB, ConstMatrixReference<ElementType, layout> matrixB, MatrixReference<ElementType, layout> output)
-        {
-            if (matrixA.IsContiguous() && matrixB.IsContiguous() && output.IsContiguous())
-            {
-                Internal::VectorOperations<implementation>::ScaleAddSet(scalarA, matrixA.ReferenceAsVector(), scalarB, matrixB.ReferenceAsVector(), output.ReferenceAsVector());
-            }
-            else
-            {
-                for (size_t i = 0; i < matrixA.GetMinorSize(); ++i)
-                {
-                    Internal::VectorOperations<implementation>::ScaleAddSet(scalarA, matrixA.GetMajorVector(i), scalarB, matrixB.GetMajorVector(i), output.GetMajorVector(i));
-                }
-            }
-        }
-
-        template <ImplementationType implementation, typename ElementType, typename scalarAType, typename scalarBType, MatrixLayout layout>
-        void ScaleAddSetAsVectors(scalarAType scalarA, ConstMatrixReference<ElementType, layout> matrixA, scalarBType scalarB, ConstMatrixReference<ElementType, layout> matrixB, MatrixReference<ElementType, TransposeMatrixLayout<layout>::value> output)
-        {
-            for (size_t i = 0; i < matrixA.NumRows(); ++i)
-            {
-                Internal::VectorOperations<implementation>::ScaleAddSet(scalarA, matrixA.GetRow(i), scalarB, matrixB.GetRow(i), output.GetRow(i));
-            }
-        }
-
-        template <ImplementationType implementation, typename ElementType, typename scalarAType, typename scalarBType, MatrixLayout layout, MatrixLayout outputLayout>
-        void ScaleAddSetAsVectors(scalarAType scalarA, ConstMatrixReference<ElementType, layout> matrixA, scalarBType scalarB, ConstMatrixReference<ElementType, TransposeMatrixLayout<layout>::value> matrixB, MatrixReference<ElementType, outputLayout> output)
-        {
-            for (size_t i = 0; i < matrixA.NumRows(); ++i)
-            {
-                Internal::VectorOperations<implementation>::ScaleAddSet(scalarA, matrixA.GetRow(i), scalarB, matrixB.GetRow(i), output.GetRow(i));
-            }
-        }
-    } // namespace Internal
-
-    // output = scalarA * matrixA + matrixB
-    template <ImplementationType implementation, typename ElementType, MatrixLayout layoutA, MatrixLayout layoutB, MatrixLayout outputLayout>
-    void ScaleAddSet(ElementType scalarA, ConstMatrixReference<ElementType, layoutA> matrixA, One, ConstMatrixReference<ElementType, layoutB> matrixB, MatrixReference<ElementType, outputLayout> output)
-    {
-        DEBUG_CHECK_SIZES(matrixA.NumRows() != matrixB.NumRows() || matrixA.NumColumns() != matrixB.NumColumns() || matrixA.NumRows() != output.NumRows() || matrixA.NumColumns() != output.NumColumns(), "Incompatible matrix sizes.");
-
-        if (scalarA == 0)
-        {
-            output.CopyFrom(matrixB);
-        }
-        else if (scalarA == 1)
-        {
-            Internal::AddSetAsVectors<implementation>(matrixA, matrixB, output);
-        }
-        else
-        {
-            Internal::ScaleAddSetAsVectors<implementation>(scalarA, matrixA, One(), matrixB, output);
-        }
-    }
-
-    //// output = matrixA + scalarB * matrixB
-    template <ImplementationType implementation, typename ElementType, MatrixLayout layoutA, MatrixLayout layoutB, MatrixLayout outputLayout>
-    void ScaleAddSet(One, ConstMatrixReference<ElementType, layoutA> matrixA, ElementType scalarB, ConstMatrixReference<ElementType, layoutB> matrixB, MatrixReference<ElementType, outputLayout> output)
-    {
-        DEBUG_CHECK_SIZES(matrixA.NumRows() != matrixB.NumRows() || matrixA.NumColumns() != matrixB.NumColumns() || matrixA.NumRows() != output.NumRows() || matrixA.NumColumns() != output.NumColumns(), "Incompatible matrix sizes.");
-
-        if (scalarB == 0)
-        {
-            output.CopyFrom(matrixA);
-        }
-        else if (scalarB == 1)
-        {
-            Internal::AddSetAsVectors<implementation>(matrixA, matrixB, output);
-        }
-        else
-        {
-            Internal::ScaleAddSetAsVectors<implementation>(One(), matrixA, scalarB, matrixB, output);
-        }
-    }
-
-    // output = scalarA * matrixA + scalarB * matrixB
-    template <ImplementationType implementation, typename ElementType, MatrixLayout layoutA, MatrixLayout layoutB, MatrixLayout outputLayout>
-    void ScaleAddSet(ElementType scalarA, ConstMatrixReference<ElementType, layoutA> matrixA, ElementType scalarB, ConstMatrixReference<ElementType, layoutB> matrixB, MatrixReference<ElementType, outputLayout> output)
-    {
-        DEBUG_CHECK_SIZES(matrixA.NumRows() != matrixB.NumRows() || matrixA.NumColumns() != matrixB.NumColumns() || matrixA.NumRows() != output.NumRows() || matrixA.NumColumns() != output.NumColumns(), "Incompatible matrix sizes.");
-
-        if (scalarA == 0)
-        {
-            ScaleSet<implementation>(scalarB, matrixB, output);
-        }
-        else if (scalarA == 1)
-        {
-            ScaleAddSet<implementation>(One(), matrixA, scalarB, matrixB, output);
-        }
-        else if (scalarB == 0)
-        {
-            Internal::ScaleSetAsVectors<implementation>(scalarA, matrixA, output);
-        }
-        else if (scalarB == 1)
-        {
-            Internal::ScaleAddSetAsVectors<implementation>(scalarA, matrixA, One(), matrixB, output);
-        }
-        else
-        {
-            Internal::ScaleAddSetAsVectors<implementation>(scalarA, matrixA, scalarB, matrixB, output);
-        }
-    }
-
-    template <typename ElementType, MatrixLayout layout>
-    void RowwiseSum(ConstMatrixReference<ElementType, layout> matrix, ColumnVectorReference<ElementType> vector)
-    {
-        DEBUG_CHECK_SIZES(vector.Size() != matrix.NumRows(), "Incompatible matrix vector sizes.");
-
-        math::ColumnVector<ElementType> ones(matrix.NumColumns());
-        ones.Fill(1);
-
-        MultiplyScaleAddUpdate(static_cast<ElementType>(1), matrix, ones, static_cast<ElementType>(0), vector);
-    }
-
-    template <typename ElementType, MatrixLayout layout>
-    void ColumnwiseSum(ConstMatrixReference<ElementType, layout> matrix, RowVectorReference<ElementType> vector)
-    {
-        DEBUG_CHECK_SIZES(vector.Size() != matrix.NumColumns(), "Incompatible matrix vector sizes.");
-
-        math::RowVector<ElementType> ones(matrix.NumRows());
-        ones.Fill(1);
-
-        MultiplyScaleAddUpdate(static_cast<ElementType>(1), ones, matrix, static_cast<ElementType>(0), vector);
-    }
-
-    template <ImplementationType implementation, typename ElementType, MatrixLayout layout>
-    void RankOneUpdate(ElementType scalar, ConstColumnVectorReference<ElementType> vectorA, ConstRowVectorReference<ElementType> vectorB, MatrixReference<ElementType, layout> matrix)
-    {
-        DEBUG_CHECK_SIZES(vectorA.Size() != matrix.NumRows() || vectorB.Size() != matrix.NumColumns(), "Incompatible matrix vector sizes.");
-        Internal::MatrixOperations<implementation>::RankOneUpdate(scalar, vectorA, vectorB, matrix);
-    }
-
-    template <ImplementationType implementation, typename ElementType, MatrixLayout layout>
-    void MultiplyScaleAddUpdate(ElementType scalarA, ConstMatrixReference<ElementType, layout> matrix, ConstColumnVectorReference<ElementType> vectorA, ElementType scalarB, ColumnVectorReference<ElementType> vectorB)
-    {
-        DEBUG_CHECK_SIZES(matrix.NumColumns() != vectorA.Size() || matrix.NumRows() != vectorB.Size(), "Incompatible matrix vector sizes.");
-
-        Internal::MatrixOperations<implementation>::MultiplyScaleAddUpdate(scalarA, matrix, vectorA, scalarB, vectorB);
-    }
-
-    template <ImplementationType implementation, typename ElementType, MatrixLayout layout>
-    void MultiplyScaleAddUpdate(ElementType scalarA, ConstRowVectorReference<ElementType> vectorA, ConstMatrixReference<ElementType, layout> matrix, ElementType scalarB, RowVectorReference<ElementType> vectorB)
-    {
-        DEBUG_CHECK_SIZES(matrix.NumRows() != vectorA.Size() || matrix.NumColumns() != vectorB.Size(), "Incompatible matrix vector sizes.");
-
-        Internal::MatrixOperations<implementation>::MultiplyScaleAddUpdate(scalarA, vectorA, matrix, scalarB, vectorB);
-    }
-
-    template <ImplementationType implementation, typename ElementType, MatrixLayout layoutA, MatrixLayout layoutB, MatrixLayout layoutC>
-    void MultiplyScaleAddUpdate(ElementType scalarA, ConstMatrixReference<ElementType, layoutA> matrixA, ConstMatrixReference<ElementType, layoutB> matrixB, ElementType scalarC, MatrixReference<ElementType, layoutC> matrixC)
-    {
-        DEBUG_CHECK_SIZES(matrixA.NumColumns() != matrixB.NumRows() || matrixA.NumRows() != matrixC.NumRows() || matrixB.NumColumns() != matrixC.NumColumns(), "Incompatible matrix sizes.");
-
-        Internal::MatrixOperations<implementation>::MultiplyScaleAddUpdate(scalarA, matrixA, matrixB, scalarC, matrixC);
-    }
-
-    template <typename ElementType, MatrixLayout layoutA, MatrixLayout layoutB>
-    void ElementwiseMultiplySet(ConstMatrixReference<ElementType, layoutA> matrixA, ConstMatrixReference<ElementType, layoutB> matrixB, MatrixReference<ElementType, layoutA> matrixC)
-    {
-        for (size_t i = 0; i < matrixA.NumRows(); ++i)
-        {
-            ElementwiseMultiplySet(matrixA.GetRow(i), matrixB.GetRow(i), matrixC.GetRow(i));
-        }
-    }
-
-    template <typename ElementType, MatrixLayout layout>
-    void RowwiseCumulativeSumUpdate(MatrixReference<ElementType, layout> matrix)
-    {
-        for (size_t i = 0; i < matrix.NumRows(); ++i)
-        {
-            CumulativeSumUpdate(matrix.GetRow(i));
-        }
-    }
-
-    template <typename ElementType, MatrixLayout layout>
-    void ColumnwiseCumulativeSumUpdate(MatrixReference<ElementType, layout> matrix)
-    {
-        for (size_t i = 0; i < matrix.NumColumns(); ++i)
-        {
-            CumulativeSumUpdate(matrix.GetColumn(i));
-        }
-    }
-
-    template <typename ElementType, MatrixLayout layout>
-    void RowwiseConsecutiveDifferenceUpdate(MatrixReference<ElementType, layout> matrix)
-    {
-        for (size_t i = 0; i < matrix.NumRows(); ++i)
-        {
-            ConsecutiveDifferenceUpdate(matrix.GetRow(i));
-        }
-    }
-
-    template <typename ElementType, MatrixLayout layout>
-    void ColumnwiseConsecutiveDifferenceUpdate(MatrixReference<ElementType, layout> matrix)
-    {
-        for (size_t i = 0; i < matrix.NumColumns(); ++i)
-        {
-            ConsecutiveDifferenceUpdate(matrix.GetColumn(i));
-        }
-    }
-
-    //
-    // Native implementations of operations
-    //
-
-    namespace Internal
-    {
-        template <typename ElementType, MatrixLayout layout>
-        void MatrixOperations<ImplementationType::native>::RankOneUpdate(ElementType scalar, ConstColumnVectorReference<ElementType> vectorA, ConstRowVectorReference<ElementType> vectorB, MatrixReference<ElementType, layout> matrix)
-        {
-            for (size_t i = 0; i < matrix.NumRows(); ++i)
-            {
-                for (size_t j = 0; j < matrix.NumColumns(); ++j)
-                {
-                    matrix(i, j) += scalar * vectorA[i] * vectorB[j];
-                }
-            }
-        }
-
-        template <typename ElementType, MatrixLayout layout>
-        void MatrixOperations<ImplementationType::native>::MultiplyScaleAddUpdate(ElementType scalarA, ConstMatrixReference<ElementType, layout> matrix, ConstColumnVectorReference<ElementType> vectorA, ElementType scalarB, ColumnVectorReference<ElementType> vectorB)
-        {
-            for (size_t i = 0; i < matrix.NumRows(); ++i)
-            {
-                auto row = matrix.GetRow(i);
-                vectorB[i] = scalarA * Dot(row, vectorA) + scalarB * vectorB[i];
-            }
-        }
-
-        template <typename ElementType, MatrixLayout layout>
-        void MatrixOperations<ImplementationType::native>::MultiplyScaleAddUpdate(ElementType scalarA, ConstRowVectorReference<ElementType> vectorA, ConstMatrixReference<ElementType, layout> matrix, ElementType scalarB, RowVectorReference<ElementType> vectorB)
-        {
-            MultiplyScaleAddUpdate(scalarA, matrix.Transpose(), vectorA.Transpose(), scalarB, vectorB.Transpose());
-        }
-
-        template <typename ElementType, MatrixLayout layoutA, MatrixLayout layoutB, MatrixLayout layoutC>
-        void MatrixOperations<ImplementationType::native>::MultiplyScaleAddUpdate(ElementType scalarA, ConstMatrixReference<ElementType, layoutA> matrixA, ConstMatrixReference<ElementType, layoutB> matrixB, ElementType scalarB, MatrixReference<ElementType, layoutC> matrixC)
-        {
-            for (size_t i = 0; i < matrixA.NumRows(); ++i)
-            {
-                for (size_t j = 0; j < matrixB.NumColumns(); ++j)
-                {
-                    auto row = matrixA.GetRow(i);
-                    auto column = matrixB.GetColumn(j);
-                    matrixC(i, j) = scalarA * Dot(row, column) + scalarB * matrixC(i, j);
-                }
-            }
-        }
-
-#if defined(USE_BLAS)
-        //
-        // OpenBLAS implementations of operations
-        //
-
-        template <typename ElementType, MatrixLayout layout>
-        void MatrixOperations<ImplementationType::openBlas>::RankOneUpdate(ElementType scalar, ConstColumnVectorReference<ElementType> vectorA, ConstRowVectorReference<ElementType> vectorB, MatrixReference<ElementType, layout> matrix)
-        {
-            Blas::Ger(matrix.GetLayout(), static_cast<int>(matrix.NumRows()), static_cast<int>(matrix.NumColumns()), scalar, vectorA.GetConstDataPointer(), static_cast<int>(vectorA.GetIncrement()), vectorB.GetConstDataPointer(), static_cast<int>(vectorB.GetIncrement()), matrix.GetDataPointer(), static_cast<int>(matrix.GetIncrement()));
-        }
-
-        template <typename ElementType, MatrixLayout layout>
-        void MatrixOperations<ImplementationType::openBlas>::MultiplyScaleAddUpdate(ElementType scalarA, ConstMatrixReference<ElementType, layout> matrix, ConstColumnVectorReference<ElementType> vectorA, ElementType scalarB, ColumnVectorReference<ElementType> vectorB)
-        {
-            Blas::Gemv(matrix.GetLayout(), MatrixTranspose::noTranspose, static_cast<int>(matrix.NumRows()), static_cast<int>(matrix.NumColumns()), scalarA, matrix.GetConstDataPointer(), static_cast<int>(matrix.GetIncrement()), vectorA.GetConstDataPointer(), static_cast<int>(vectorA.GetIncrement()), scalarB, vectorB.GetDataPointer(), static_cast<int>(vectorB.GetIncrement()));
-        }
-
-        template <typename ElementType, MatrixLayout layout>
-        void MatrixOperations<ImplementationType::openBlas>::MultiplyScaleAddUpdate(ElementType scalarA, ConstRowVectorReference<ElementType> vectorA, ConstMatrixReference<ElementType, layout> matrix, ElementType scalarB, RowVectorReference<ElementType> vectorB)
-        {
-            MultiplyScaleAddUpdate(scalarA, matrix.Transpose(), vectorA.Transpose(), scalarB, vectorB.Transpose());
-        }
-
-        template <typename ElementType, MatrixLayout layoutA, MatrixLayout layoutB, MatrixLayout layoutC>
-        void MatrixOperations<ImplementationType::openBlas>::MultiplyScaleAddUpdate(ElementType scalarA, ConstMatrixReference<ElementType, layoutA> matrixA, ConstMatrixReference<ElementType, layoutB> matrixB, ElementType scalarB, MatrixReference<ElementType, layoutC> matrixC)
-        {
-            MatrixLayout order = matrixC.GetLayout();
-            MatrixTranspose transposeA = matrixA.GetLayout() == order ? MatrixTranspose::noTranspose : MatrixTranspose::transpose;
-            MatrixTranspose transposeB = matrixB.GetLayout() == order ? MatrixTranspose::noTranspose : MatrixTranspose::transpose;
-
-            Blas::Gemm(order, transposeA, transposeB, static_cast<int>(matrixA.NumRows()), static_cast<int>(matrixB.NumColumns()), static_cast<int>(matrixA.NumColumns()), scalarA, matrixA.GetConstDataPointer(), static_cast<int>(matrixA.GetIncrement()), matrixB.GetConstDataPointer(), static_cast<int>(matrixB.GetIncrement()), scalarB, matrixC.GetDataPointer(), static_cast<int>(matrixC.GetIncrement()));
-        }
-#endif
-    } // namespace Internal
-} // namespace math
-} // namespace ell
diff --git a/libraries/math/tcc/Tensor.tcc b/libraries/math/tcc/Tensor.tcc
deleted file mode 100644
index 1e43a06f2..000000000
--- a/libraries/math/tcc/Tensor.tcc
+++ /dev/null
@@ -1,806 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     Tensor.tcc (math)
-//  Authors:  Ofer Dekel, Kern Handa
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#include <algorithm>
-
-namespace ell
-{
-namespace math
-{
-    //
-    // TensorMatrixSlicers
-    //
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2, Dimension rowDimension, Dimension columnDimension>
-    struct TensorMatrixSlicer;
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    struct TensorMatrixSlicer<ElementType, dimension0, dimension1, dimension2, dimension0, dimension1>
-    {
-        using SliceType = ColumnMatrixReference<ElementType>;
-        using ConstSliceType = ConstColumnMatrixReference<ElementType>;
-
-        inline static size_t NumSlices(TensorShape shape)
-        {
-            return shape.GetValue<dimension2>();
-        }
-
-        static ConstSliceType GetConstSlice(const ElementType* pData, TensorShape shape, size_t increment1, size_t increment2, size_t index)
-        {
-            DEBUG_THROW(index >= NumSlices(shape), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "index exceeds tensor dimensions."));
-
-            return ConstSliceType(pData + index * increment2, shape.GetValue<dimension0>(), shape.GetValue<dimension1>(), increment1);
-        }
-
-        static SliceType GetSlice(ElementType* pData, TensorShape shape, size_t increment1, size_t increment2, size_t index)
-        {
-            DEBUG_THROW(index >= NumSlices(shape), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "index exceeds tensor dimensions."));
-
-            return SliceType(pData + index * increment2, shape.GetValue<dimension0>(), shape.GetValue<dimension1>(), increment1);
-        }
-    };
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    struct TensorMatrixSlicer<ElementType, dimension0, dimension1, dimension2, dimension0, dimension2>
-    {
-        using SliceType = ColumnMatrixReference<ElementType>;
-        using ConstSliceType = ConstColumnMatrixReference<ElementType>;
-
-        inline static size_t NumSlices(TensorShape shape)
-        {
-            return shape.GetValue<dimension1>();
-        }
-
-        static ConstSliceType GetConstSlice(const ElementType* pData, TensorShape shape, size_t increment1, size_t increment2, size_t index)
-        {
-            DEBUG_THROW(index >= NumSlices(shape), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "index exceeds tensor dimensions."));
-
-            return ConstSliceType(pData + index * increment1, shape.GetValue<dimension0>(), shape.GetValue<dimension2>(), increment2);
-        }
-
-        static SliceType GetSlice(ElementType* pData, TensorShape shape, size_t increment1, size_t increment2, size_t index)
-        {
-            DEBUG_THROW(index >= NumSlices(shape), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "index exceeds tensor dimensions."));
-
-            return SliceType(pData + index * increment1, shape.GetValue<dimension0>(), shape.GetValue<dimension2>(), increment2);
-        }
-    };
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    struct TensorMatrixSlicer<ElementType, dimension0, dimension1, dimension2, dimension1, dimension0>
-    {
-        using SliceType = RowMatrixReference<ElementType>;
-        using ConstSliceType = ConstRowMatrixReference<ElementType>;
-
-        inline static size_t NumSlices(TensorShape shape)
-        {
-            return shape.GetValue<dimension2>();
-        }
-
-        static ConstSliceType GetConstSlice(const ElementType* pData, TensorShape shape, size_t increment1, size_t increment2, size_t index)
-        {
-            DEBUG_THROW(index >= NumSlices(shape), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "index exceeds tensor dimensions."));
-
-            return ConstSliceType(pData + index * increment2, shape.GetValue<dimension1>(), shape.GetValue<dimension0>(), increment1);
-        }
-
-        static SliceType GetSlice(ElementType* pData, TensorShape shape, size_t increment1, size_t increment2, size_t index)
-        {
-            DEBUG_THROW(index >= NumSlices(shape), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "index exceeds tensor dimensions."));
-
-            return SliceType(pData + index * increment2, shape.GetValue<dimension1>(), shape.GetValue<dimension0>(), increment1);
-        }
-    };
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    struct TensorMatrixSlicer<ElementType, dimension0, dimension1, dimension2, dimension2, dimension0>
-    {
-        using SliceType = RowMatrixReference<ElementType>;
-        using ConstSliceType = ConstRowMatrixReference<ElementType>;
-
-        inline static size_t NumSlices(TensorShape shape)
-        {
-            return shape.GetValue<dimension1>();
-        }
-
-        static ConstSliceType GetConstSlice(const ElementType* pData, TensorShape shape, size_t increment1, size_t increment2, size_t index)
-        {
-            DEBUG_THROW(index >= NumSlices(shape), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "index exceeds tensor dimensions."));
-
-            return ConstSliceType(pData + index * increment1, shape.GetValue<dimension2>(), shape.GetValue<dimension0>(), increment2);
-        }
-
-        static SliceType GetSlice(ElementType* pData, TensorShape shape, size_t increment1, size_t increment2, size_t index)
-        {
-            DEBUG_THROW(index >= NumSlices(shape), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "index exceeds tensor dimensions."));
-
-            return SliceType(pData + index * increment1, shape.GetValue<dimension2>(), shape.GetValue<dimension0>(), increment2);
-        }
-    };
-
-    //
-    // TensorVectorSlicers
-    //
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2, Dimension vectorDimension>
-    struct TensorVectorSlicer;
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    struct TensorVectorSlicer<ElementType, dimension0, dimension1, dimension2, dimension0>
-    {
-        using SliceType = ColumnVectorReference<ElementType>;
-        using ConstSliceType = ConstColumnVectorReference<ElementType>;
-
-        static inline size_t NumSlices(TensorShape shape)
-        {
-            return shape.GetValue<dimension1>() * shape.GetValue<dimension2>();
-        }
-
-        static ConstSliceType GetConstSlice(const ElementType* pData, TensorShape shape, size_t increment1, size_t increment2, size_t index1, size_t index2)
-        {
-            constexpr bool shouldSwap = dimension1 > dimension2;
-            if /*constexpr*/ (shouldSwap)
-            {
-                std::swap(index1, index2);
-            }
-
-            DEBUG_THROW(index1 >= shape.GetValue<dimension1>(), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "index1 exceeds tensor dimensions."));
-            DEBUG_THROW(index2 >= shape.GetValue<dimension2>(), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "index2 exceeds tensor dimensions."));
-
-            return ConstSliceType(pData + index1 * increment1 + index2 * increment2, shape.GetValue<dimension0>(), 1);
-        }
-
-        static SliceType GetSlice(ElementType* pData, TensorShape shape, size_t increment1, size_t increment2, size_t index1, size_t index2)
-        {
-            constexpr bool shouldSwap = dimension1 > dimension2;
-            if /*constexpr*/ (shouldSwap)
-            {
-                std::swap(index1, index2);
-            }
-
-            DEBUG_THROW(index1 >= shape.GetValue<dimension1>(), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "index1 exceeds tensor dimensions."));
-            DEBUG_THROW(index2 >= shape.GetValue<dimension2>(), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "index2 exceeds tensor dimensions."));
-
-            return SliceType(pData + index1 * increment1 + index2 * increment2, shape.GetValue<dimension0>(), 1);
-        }
-    };
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    struct TensorVectorSlicer<ElementType, dimension0, dimension1, dimension2, dimension1>
-    {
-        using SliceType = ColumnVectorReference<ElementType>;
-        using ConstSliceType = ConstColumnVectorReference<ElementType>;
-
-        static inline size_t NumSlices(TensorShape shape)
-        {
-            return shape.GetValue<dimension0>() * shape.GetValue<dimension2>();
-        }
-
-        static ConstSliceType GetConstSlice(const ElementType* pData, TensorShape shape, size_t increment1, size_t increment2, size_t index1, size_t index2)
-        {
-            constexpr bool shouldSwap = dimension0 > dimension2;
-            if /*constexpr*/ (shouldSwap)
-            {
-                std::swap(index1, index2);
-            }
-
-            DEBUG_THROW(index1 >= shape.GetValue<dimension0>(), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "index1 exceeds tensor dimensions."));
-            DEBUG_THROW(index2 >= shape.GetValue<dimension2>(), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "index2 exceeds tensor dimensions."));
-
-            return ConstSliceType(pData + index1 + index2 * increment2, shape.GetValue<dimension1>(), increment1);
-        }
-
-        static SliceType GetSlice(ElementType* pData, TensorShape shape, size_t increment1, size_t increment2, size_t index1, size_t index2)
-        {
-            constexpr bool shouldSwap = dimension0 > dimension2;
-            if /*constexpr*/ (shouldSwap)
-            {
-                std::swap(index1, index2);
-            }
-
-            DEBUG_THROW(index1 >= shape.GetValue<dimension0>(), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "index1 exceeds tensor dimensions."));
-            DEBUG_THROW(index2 >= shape.GetValue<dimension2>(), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "index2 exceeds tensor dimensions."));
-
-            return SliceType(pData + index1 + index2 * increment2, shape.GetValue<dimension1>(), increment1);
-        }
-    };
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    struct TensorVectorSlicer<ElementType, dimension0, dimension1, dimension2, dimension2>
-    {
-        using SliceType = ColumnVectorReference<ElementType>;
-        using ConstSliceType = ConstColumnVectorReference<ElementType>;
-
-        static inline size_t NumSlices(TensorShape shape)
-        {
-            return shape.GetValue<dimension0>() * shape.GetValue<dimension1>();
-        }
-
-        static ConstSliceType GetConstSlice(const ElementType* pData, TensorShape shape, size_t increment1, size_t increment2, size_t index1, size_t index2)
-        {
-            constexpr bool shouldSwap = dimension0 > dimension1;
-            if /*constexpr*/ (shouldSwap)
-            {
-                std::swap(index1, index2);
-            }
-
-            DEBUG_THROW(index1 >= shape.GetValue<dimension0>(), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "index1 exceeds tensor dimensions."));
-            DEBUG_THROW(index2 >= shape.GetValue<dimension1>(), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "index2 exceeds tensor dimensions."));
-
-            return ConstSliceType(pData + index1 + index2 * increment1, shape.GetValue<dimension2>(), increment2);
-        }
-
-        static SliceType GetSlice(ElementType* pData, TensorShape shape, size_t increment1, size_t increment2, size_t index1, size_t index2)
-        {
-            constexpr bool shouldSwap = dimension0 > dimension1;
-            if /*constexpr*/ (shouldSwap)
-            {
-                std::swap(index1, index2);
-            }
-
-            DEBUG_THROW(index1 >= shape.GetValue<dimension0>(), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "index1 exceeds tensor dimensions."));
-            DEBUG_THROW(index2 >= shape.GetValue<dimension1>(), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "index2 exceeds tensor dimensions."));
-
-            return SliceType(pData + index1 + index2 * increment1, shape.GetValue<dimension2>(), increment2);
-        }
-    };
-
-    //
-    // ConstTensorReference
-    //
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    ConstTensorReference<ElementType, dimension0, dimension1, dimension2>::ConstTensorReference(TensorShape shape) :
-        ConstTensorReference<ElementType, dimension0, dimension1, dimension2>(nullptr, shape)
-    {
-    }
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    ConstTensorReference<ElementType, dimension0, dimension1, dimension2>::ConstTensorReference(const ElementType* pData, TensorShape shape) :
-        _pData(pData),
-        _shape(shape)
-    {
-        _increment1 = shape.GetValue<dimension0>();
-        _increment2 = _increment1 * shape.GetValue<dimension1>();
-    }
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    template <size_t dimensionIndex>
-    size_t ConstTensorReference<ElementType, dimension0, dimension1, dimension2>::GetSize() const
-    {
-        constexpr auto dimension = std::get<dimensionIndex>(std::make_tuple(dimension0, dimension1, dimension2));
-        return _shape.GetValue<dimension>();
-    }
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    template <Dimension rowDimension, Dimension columnDimension>
-    size_t ConstTensorReference<ElementType, dimension0, dimension1, dimension2>::NumSlices() const
-    {
-        return TensorMatrixSlicer<ElementType, dimension0, dimension1, dimension2, rowDimension, columnDimension>::NumSlices(_shape);
-    }
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    template <Dimension dimension>
-    size_t ConstTensorReference<ElementType, dimension0, dimension1, dimension2>::NumSlices() const
-    {
-        return TensorVectorSlicer<ElementType, dimension0, dimension1, dimension2, dimension>::NumSlices(_shape);
-    }
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    size_t ConstTensorReference<ElementType, dimension0, dimension1, dimension2>::NumPrimarySlices() const
-    {
-        return GetSize2();
-    }
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    ElementType ConstTensorReference<ElementType, dimension0, dimension1, dimension2>::operator()(size_t row, size_t column, size_t channel) const
-    {
-        return operator()({ row, column, channel });
-    }
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    ElementType ConstTensorReference<ElementType, dimension0, dimension1, dimension2>::operator()(TensorCoordinate coordinate) const
-    {
-        return GetConstDataPointer()[this->GetOffset(coordinate)];
-    }
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    std::vector<ElementType> ConstTensorReference<ElementType, dimension0, dimension1, dimension2>::ToArray() const
-    {
-        if (!IsContiguous())
-        {
-            auto resultIncrement0 = _shape.GetValue<dimension0>();
-            auto resultIncrement1 = resultIncrement0 * _shape.GetValue<dimension1>();
-
-            std::vector<ElementType> result(NumRows() * NumColumns() * NumChannels());
-            for (size_t i = 0; i < NumRows(); ++i)
-            {
-                for (size_t j = 0; j < NumColumns(); ++j)
-                {
-                    for (size_t k = 0; k < NumChannels(); ++k)
-                    {
-                        auto value = (*this)(i, j, k);
-                        auto coordinate = TensorCoordinate(i, j, k);
-                        auto resultIndex = coordinate.GetValue<dimension0>() + coordinate.GetValue<dimension1>() * resultIncrement0 + coordinate.GetValue<dimension2>() * resultIncrement1;
-                        result[resultIndex] = value;
-                    }
-                }
-            }
-            return result;
-        }
-        return { GetConstDataPointer(), GetConstDataPointer() + Size() };
-    }
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    void ConstTensorReference<ElementType, dimension0, dimension1, dimension2>::Swap(ConstTensorReference<ElementType, dimension0, dimension1, dimension2>& other)
-    {
-        std::swap(_pData, other._pData);
-        std::swap(_shape, other._shape);
-        std::swap(_increment1, other._increment1);
-        std::swap(_increment2, other._increment2);
-    }
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    bool ConstTensorReference<ElementType, dimension0, dimension1, dimension2>::IsContiguous() const
-    {
-        return GetSize0() == GetIncrement1() && GetSize0() * GetSize1() == GetIncrement2();
-    }
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    template <Dimension otherDimension0, Dimension otherDimension1, Dimension otherDimension2>
-    bool ConstTensorReference<ElementType, dimension0, dimension1, dimension2>::IsEqual(ConstTensorReference<ElementType, otherDimension0, otherDimension1, otherDimension2> other, ElementType tolerance) const
-    {
-        if (NumRows() != other.NumRows() || NumColumns() != other.NumColumns() || NumChannels() != other.NumChannels())
-        {
-            return false;
-        }
-
-        for (size_t i = 0; i < NumRows(); ++i)
-        {
-            for (size_t j = 0; j < NumColumns(); ++j)
-            {
-                for (size_t k = 0; k < NumChannels(); ++k)
-                {
-                    auto diff = (*this)(i, j, k) - other(i, j, k);
-                    if (diff > tolerance || -diff > tolerance)
-                    {
-                        return false;
-                    }
-                }
-            }
-        }
-        return true;
-    }
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    template <Dimension otherDimension0, Dimension otherDimension1, Dimension otherDimension2>
-    bool ConstTensorReference<ElementType, dimension0, dimension1, dimension2>::operator==(const ConstTensorReference<ElementType, otherDimension0, otherDimension1, otherDimension2>& other) const
-    {
-        return IsEqual(other);
-    }
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    template <Dimension otherDimension0, Dimension otherDimension1, Dimension otherDimension2>
-    bool ConstTensorReference<ElementType, dimension0, dimension1, dimension2>::operator!=(const ConstTensorReference<ElementType, otherDimension0, otherDimension1, otherDimension2>& other) const
-    {
-        return !IsEqual(other);
-    }
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    ConstTensorReference<ElementType, dimension0, dimension1, dimension2> ConstTensorReference<ElementType, dimension0, dimension1, dimension2>::GetSubTensor(size_t firstRow, size_t firstColumn, size_t firstChannel, size_t numRows, size_t numColumns, size_t numChannels) const
-    {
-        return GetSubTensor({ firstRow, firstColumn, firstChannel }, { numRows, numColumns, numChannels });
-    }
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    ConstTensorReference<ElementType, dimension0, dimension1, dimension2> ConstTensorReference<ElementType, dimension0, dimension1, dimension2>::GetSubTensor(TensorCoordinate firstCoordinate, TensorShape shape) const
-    {
-        DEBUG_THROW(firstCoordinate.GetRowIndex() + shape.NumRows() > NumRows() || firstCoordinate.GetColumnIndex() + shape.NumColumns() > NumColumns() || firstCoordinate.GetChannelIndex() + shape.NumChannels() > NumChannels(), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "subtensor exceeds tensor dimensions."));
-
-        return ConstTensorReference(GetConstDataPointer() + GetOffset(firstCoordinate), shape, GetIncrement1(), GetIncrement2());
-    }
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    template <Dimension rowDimension, Dimension columnDimension>
-    auto ConstTensorReference<ElementType, dimension0, dimension1, dimension2>::GetSlice(size_t index) const -> typename TensorMatrixSlicer<ElementType, dimension0, dimension1, dimension2, rowDimension, columnDimension>::ConstSliceType
-    {
-        return TensorMatrixSlicer<ElementType, dimension0, dimension1, dimension2, rowDimension, columnDimension>::GetConstSlice(GetConstDataPointer(), GetShape(), GetIncrement1(), GetIncrement2(), index);
-    }
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    template <Dimension dimension>
-    auto ConstTensorReference<ElementType, dimension0, dimension1, dimension2>::GetSlice(size_t index1, size_t index2) const -> typename TensorVectorSlicer<ElementType, dimension0, dimension1, dimension2, dimension>::ConstSliceType
-    {
-        return TensorVectorSlicer<ElementType, dimension0, dimension1, dimension2, dimension>::GetConstSlice(GetConstDataPointer(), GetShape(), GetIncrement1(), GetIncrement2(), index1, index2);
-    }
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2> // pData -> GetDataPointer
-    auto ConstTensorReference<ElementType, dimension0, dimension1, dimension2>::GetPrimarySlice(size_t index) const -> typename TensorMatrixSlicer<ElementType, dimension0, dimension1, dimension2, dimension0, dimension1>::ConstSliceType
-    {
-        return TensorMatrixSlicer<ElementType, dimension0, dimension1, dimension2, dimension0, dimension1>::GetConstSlice(GetConstDataPointer(), GetShape(), GetIncrement1(), GetIncrement2(), index);
-    }
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    ConstRowVectorReference<ElementType> ConstTensorReference<ElementType, dimension0, dimension1, dimension2>::ReferenceAsVector() const
-    {
-        DEBUG_THROW(GetSize0() != GetIncrement1() || GetSize0() * GetSize1() != GetIncrement2(), utilities::LogicException(utilities::LogicExceptionErrors::illegalState, "Can only flatten a tensor when alll the dimensions are full"));
-
-        return ConstRowVectorReference<ElementType>(GetConstDataPointer(), Size(), 1);
-    }
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    ConstRowMatrixReference<ElementType> ConstTensorReference<ElementType, dimension0, dimension1, dimension2>::ReferenceAsMatrix() const
-    {
-        DEBUG_THROW(GetSize0() != GetIncrement1(), utilities::LogicException(utilities::LogicExceptionErrors::illegalState, "Can only flatten a tensor when the first dimension is full"));
-
-        return ConstRowMatrixReference<ElementType>(GetConstDataPointer(), GetSize2(), GetSize0() * GetSize1(), GetIncrement2());
-    }
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    size_t ConstTensorReference<ElementType, dimension0, dimension1, dimension2>::GetOffset(TensorCoordinate coordinate) const
-    {
-        DEBUG_THROW(coordinate.GetRowIndex() >= NumRows() || coordinate.GetColumnIndex() >= NumColumns() || coordinate.GetChannelIndex() >= NumChannels(), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, std::string("index exceeds tensor size in ConstTensorReference::GetOffset().") + " Tensor size: (" + std::to_string(NumRows()) + " x " + std::to_string(NumColumns()) + " x " + std::to_string(NumChannels()) + "), "
-                                                                                                                                                                                                                                                                                                                                                                                                                                                       " index: (" +
-                                                                                                                                                                                                                                           std::to_string(coordinate.GetRowIndex()) + ", " + std::to_string(coordinate.GetColumnIndex()) + ", " + std::to_string(coordinate.GetChannelIndex()) + ")"));
-
-        return coordinate.GetValue<dimension0>() + coordinate.GetValue<dimension1>() * GetIncrement1() + coordinate.GetValue<dimension2>() * GetIncrement2();
-    }
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    ConstTensorReference<ElementType, dimension0, dimension1, dimension2>::ConstTensorReference(const ElementType* pData, TensorShape shape, size_t increment1, size_t increment2) :
-        _pData(pData),
-        _shape(shape),
-        _increment1(increment1),
-        _increment2(increment2)
-    {}
-
-    template <Dimension rowDimension, Dimension columnDimension, typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    size_t NumSlices(ConstTensorReference<ElementType, dimension0, dimension1, dimension2> tensor)
-    {
-        return tensor.template NumSlices<rowDimension, columnDimension>();
-    }
-
-    template <Dimension dimension, typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    size_t NumSlices(ConstTensorReference<ElementType, dimension0, dimension1, dimension2> tensor)
-    {
-        return tensor.template NumSlices<dimension>();
-    }
-
-    template <Dimension rowDimension, Dimension columnDimension, typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    auto GetSlice(ConstTensorReference<ElementType, dimension0, dimension1, dimension2> tensor, size_t index)
-    {
-        return tensor.template GetSlice<rowDimension, columnDimension>(index);
-    }
-
-    template <Dimension dimension, typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    auto GetSlice(ConstTensorReference<ElementType, dimension0, dimension1, dimension2> tensor, size_t index1, size_t index2)
-    {
-        return tensor.template GetSlice<dimension>(index1, index2);
-    }
-
-    //
-    // TensorReference
-    //
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    TensorReference<ElementType, dimension0, dimension1, dimension2>::TensorReference(TensorShape shape) :
-        ConstTensorRef(shape)
-    {}
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    TensorReference<ElementType, dimension0, dimension1, dimension2>::TensorReference(ElementType* pData, size_t numRows, size_t numColumns, size_t numChannels) :
-        ConstTensorRef(pData, TensorShape{ numRows, numColumns, numChannels })
-    {}
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    ElementType& TensorReference<ElementType, dimension0, dimension1, dimension2>::operator()(size_t row, size_t column, size_t channel)
-    {
-        return operator()({ row, column, channel });
-    }
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    ElementType& TensorReference<ElementType, dimension0, dimension1, dimension2>::operator()(TensorCoordinate coordinate)
-    {
-        return GetDataPointer()[this->GetOffset(coordinate)];
-    }
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    void TensorReference<ElementType, dimension0, dimension1, dimension2>::Swap(TensorReference<ElementType, dimension0, dimension1, dimension2>& other)
-    {
-        ConstTensorRef::Swap(other);
-    }
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    void TensorReference<ElementType, dimension0, dimension1, dimension2>::CopyFrom(ConstTensorReference<ElementType, dimension0, dimension1, dimension2> other)
-    {
-        DEBUG_CHECK_SIZES(this->NumRows() != other.NumRows(), "Tensors must have the same number of rows");
-        DEBUG_CHECK_SIZES(this->NumColumns() != other.NumColumns(), "Tensors must have the same number of columns");
-        DEBUG_CHECK_SIZES(this->NumChannels() != other.NumChannels(), "Tensors must have the same number of channels");
-
-        for (size_t i = 0; i < this->NumPrimarySlices(); ++i)
-        {
-            auto slice = other.GetPrimarySlice(i);
-            GetPrimarySlice(i).CopyFrom(slice);
-        }
-    }
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    void TensorReference<ElementType, dimension0, dimension1, dimension2>::CopyFrom(ConstTensorReference<ElementType, dimension0, dimension2, dimension1> other)
-    {
-        DEBUG_CHECK_SIZES(this->NumRows() != other.NumRows(), "Tensors must have the same number of rows");
-        DEBUG_CHECK_SIZES(this->NumColumns() != other.NumColumns(), "Tensors must have the same number of columns");
-        DEBUG_CHECK_SIZES(this->NumChannels() != other.NumChannels(), "Tensors must have the same number of channels");
-
-        for (size_t i = 0; i < NumSlices<dimension0, dimension1>(*this); ++i)
-        {
-            this->template GetSlice<dimension0, dimension1>(i).CopyFrom(GetSlice<dimension0, dimension1>(other, i));
-        }
-    }
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    template <Dimension otherDimension0, Dimension otherDimension1, Dimension otherDimension2>
-    void TensorReference<ElementType, dimension0, dimension1, dimension2>::CopyFrom(ConstTensorReference<ElementType, otherDimension0, otherDimension1, otherDimension2> other)
-    {
-        DEBUG_CHECK_SIZES(this->NumRows() != other.NumRows(), "Tensors must have the same number of rows");
-        DEBUG_CHECK_SIZES(this->NumColumns() != other.NumColumns(), "Tensors must have the same number of columns");
-        DEBUG_CHECK_SIZES(this->NumChannels() != other.NumChannels(), "Tensors must have the same number of channels");
-
-        for (size_t i = 0; i < math::NumSlices<dimension0, otherDimension0>(*this); ++i)
-        {
-            auto thisSlice = this->template GetSlice<dimension0, otherDimension0>(i);
-            auto otherSlice = other.template GetSlice<dimension0, otherDimension0>(i);
-            thisSlice.CopyFrom(otherSlice);
-        }
-    }
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    void TensorReference<ElementType, dimension0, dimension1, dimension2>::Fill(ElementType value)
-    {
-        for (size_t i = 0; i < this->NumPrimarySlices(); ++i)
-        {
-            auto slice = GetPrimarySlice(i);
-            slice.Fill(value);
-        }
-    }
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    template <typename GeneratorType>
-    void TensorReference<ElementType, dimension0, dimension1, dimension2>::Generate(GeneratorType generator)
-    {
-        for (size_t i = 0; i < this->NumPrimarySlices(); ++i)
-        {
-            auto slice = GetPrimarySlice(i);
-            slice.Generate(generator);
-        }
-    }
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    template <typename TransformationType>
-    void TensorReference<ElementType, dimension0, dimension1, dimension2>::Transform(TransformationType transformation)
-    {
-        for (size_t i = 0; i < this->NumPrimarySlices(); ++i)
-        {
-            auto slice = GetPrimarySlice(i);
-            slice.Transform(transformation);
-        }
-    }
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    TensorReference<ElementType, dimension0, dimension1, dimension2> TensorReference<ElementType, dimension0, dimension1, dimension2>::GetSubTensor(size_t firstRow, size_t firstColumn, size_t firstChannel, size_t numRows, size_t numColumns, size_t numChannels)
-    {
-        return GetSubTensor({ firstRow, firstColumn, firstChannel }, { numRows, numColumns, numChannels });
-    }
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    TensorReference<ElementType, dimension0, dimension1, dimension2> TensorReference<ElementType, dimension0, dimension1, dimension2>::GetSubTensor(TensorCoordinate firstCoordinate, TensorShape shape)
-    {
-        DEBUG_THROW(firstCoordinate.GetRowIndex() + shape.NumRows() > this->NumRows() || firstCoordinate.GetColumnIndex() + shape.NumColumns() > this->NumColumns() || firstCoordinate.GetChannelIndex() + shape.NumChannels() > this->NumChannels(), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "subtensor exceeds tensor dimensions."));
-
-        return TensorReference(GetDataPointer() + this->GetOffset(firstCoordinate), shape, this->GetIncrement1(), this->GetIncrement2());
-    }
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    template <Dimension rowDimension, Dimension columnDimension>
-    auto TensorReference<ElementType, dimension0, dimension1, dimension2>::GetSlice(size_t index) -> typename TensorMatrixSlicer<ElementType, dimension0, dimension1, dimension2, rowDimension, columnDimension>::SliceType
-    {
-        return TensorMatrixSlicer<ElementType, dimension0, dimension1, dimension2, rowDimension, columnDimension>::GetSlice(GetDataPointer(), this->GetShape(), this->GetIncrement1(), this->GetIncrement2(), index);
-    }
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    template <Dimension dimension>
-    auto TensorReference<ElementType, dimension0, dimension1, dimension2>::GetSlice(size_t index1, size_t index2) -> typename TensorVectorSlicer<ElementType, dimension0, dimension1, dimension2, dimension>::SliceType
-    {
-        return TensorVectorSlicer<ElementType, dimension0, dimension1, dimension2, dimension>::GetSlice(GetDataPointer(), this->GetShape(), this->GetIncrement1(), this->GetIncrement2(), index1, index2);
-    }
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    auto TensorReference<ElementType, dimension0, dimension1, dimension2>::GetPrimarySlice(size_t index) -> typename TensorMatrixSlicer<ElementType, dimension0, dimension1, dimension2, dimension0, dimension1>::SliceType
-    {
-        return TensorMatrixSlicer<ElementType, dimension0, dimension1, dimension2, dimension0, dimension1>::GetSlice(GetDataPointer(), this->GetShape(), this->GetIncrement1(), this->GetIncrement2(), index);
-    }
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    RowVectorReference<ElementType> TensorReference<ElementType, dimension0, dimension1, dimension2>::ReferenceAsVector()
-    {
-        DEBUG_THROW(this->GetSize0() != this->GetIncrement1() || this->GetSize0() * this->GetSize1() != this->GetIncrement2(), utilities::LogicException(utilities::LogicExceptionErrors::illegalState, "Can only flatten a tensor to vector when alll the dimensions are full"));
-
-        return RowVectorReference<ElementType>(GetDataPointer(), this->Size(), 1);
-    }
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    RowMatrixReference<ElementType> TensorReference<ElementType, dimension0, dimension1, dimension2>::ReferenceAsMatrix()
-    {
-        DEBUG_THROW(this->GetSize0() != this->GetIncrement1(), utilities::LogicException(utilities::LogicExceptionErrors::illegalState, "Can only flatten a tensor when the first dimension is full"));
-
-        return RowMatrixReference<ElementType>(GetDataPointer(), this->GetSize2(), this->GetSize0() * this->GetSize1(), this->GetIncrement2());
-    }
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    TensorReference<ElementType, dimension0, dimension1, dimension2>::TensorReference(ElementType* pData, TensorShape shape, size_t increment1, size_t increment2) :
-        ConstTensorRef(pData, shape, increment1, increment2)
-    {}
-
-    template <Dimension rowDimension, Dimension columnDimension, typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    size_t NumSlices(TensorReference<ElementType, dimension0, dimension1, dimension2> tensor)
-    {
-        return tensor.template NumSlices<rowDimension, columnDimension>();
-    }
-
-    template <Dimension dimension, typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    size_t NumSlices(TensorReference<ElementType, dimension0, dimension1, dimension2> tensor)
-    {
-        return tensor.template NumSlices<dimension>();
-    }
-
-    template <Dimension rowDimension, Dimension columnDimension, typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    auto GetSlice(TensorReference<ElementType, dimension0, dimension1, dimension2> tensor, size_t index)
-    {
-        return tensor.template GetSlice<rowDimension, columnDimension>(index);
-    }
-
-    template <Dimension dimension, typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    auto GetSlice(TensorReference<ElementType, dimension0, dimension1, dimension2> tensor, size_t index1, size_t index2)
-    {
-        return tensor.template GetSlice<dimension>(index1, index2);
-    }
-
-    //
-    // Tensor
-    //
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    Tensor<ElementType, dimension0, dimension1, dimension2>::Tensor() :
-        Tensor(TensorShape{ 0, 0, 0 })
-    {
-    }
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    Tensor<ElementType, dimension0, dimension1, dimension2>::Tensor(size_t numRows, size_t numColumns, size_t numChannels) :
-        TensorRef(TensorShape(numRows, numColumns, numChannels)),
-        _data(numRows * numColumns * numChannels)
-    {
-        this->_pData = _data.data();
-    }
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    Tensor<ElementType, dimension0, dimension1, dimension2>::Tensor(size_t numRows, size_t numColumns, size_t numChannels, const std::vector<ElementType>& data) :
-        TensorRef(TensorShape{ numRows, numColumns, numChannels }),
-        _data(data)
-    {
-        this->_pData = _data.data();
-    }
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    Tensor<ElementType, dimension0, dimension1, dimension2>::Tensor(size_t numRows, size_t numColumns, size_t numChannels, std::vector<ElementType>&& data) :
-        TensorRef(TensorShape{ numRows, numColumns, numChannels }),
-        _data(std::move(data))
-    {
-        this->_pData = _data.data();
-    }
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    Tensor<ElementType, dimension0, dimension1, dimension2>::Tensor(TensorShape shape) :
-        TensorRef(shape),
-        _data(shape.Size())
-    {
-        this->_pData = _data.data();
-    }
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    Tensor<ElementType, dimension0, dimension1, dimension2>::Tensor(const Tensor<ElementType, dimension0, dimension1, dimension2>& other) :
-        TensorRef(other),
-        _data(other._data)
-    {
-        this->_pData = _data.data();
-    }
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    template <Dimension otherDimension0, Dimension otherDimension1, Dimension otherDimension2>
-    Tensor<ElementType, dimension0, dimension1, dimension2>::Tensor(ConstTensorReference<ElementType, otherDimension0, otherDimension1, otherDimension2> other) :
-        TensorRef(TensorShape{ other.NumRows(), other.NumColumns(), other.NumChannels() }),
-        _data(other.Size())
-    {
-        this->_pData = _data.data();
-        this->CopyFrom(other);
-    }
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    Tensor<ElementType, dimension0, dimension1, dimension2>::Tensor(std::initializer_list<std::initializer_list<std::initializer_list<ElementType>>> list) :
-        TensorRef(TensorShape{ list.size(), list.begin()->size(), list.begin()->begin()->size() }),
-        _data(list.size() * list.begin()->size() * list.begin()->begin()->size())
-    {
-        this->_pData = _data.data();
-        auto numColumns = list.begin()->size();
-        auto numChannels = list.begin()->begin()->size();
-        DEBUG_USED(numColumns, numChannels);
-
-        size_t i = 0;
-        for (auto rowIter = list.begin(); rowIter < list.end(); ++rowIter)
-        {
-            DEBUG_CHECK_SIZES(rowIter->size() != numColumns, "incorrect number of elements in initializer list");
-
-            size_t j = 0;
-            for (auto columnIter = rowIter->begin(); columnIter < rowIter->end(); ++columnIter)
-            {
-                DEBUG_CHECK_SIZES(columnIter->size() != numChannels, "incorrect number of elements in initializer list");
-
-                size_t k = 0;
-                for (auto channelIter = columnIter->begin(); channelIter < columnIter->end(); ++channelIter)
-                {
-                    (*this)(i, j, k) = *channelIter;
-                    ++k;
-                }
-                ++j;
-            }
-            ++i;
-        }
-    }
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    Tensor<ElementType, dimension0, dimension1, dimension2>& Tensor<ElementType, dimension0, dimension1, dimension2>::operator=(Tensor<ElementType, dimension0, dimension1, dimension2> other)
-    {
-        Swap(other);
-        return *this;
-    }
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    void Tensor<ElementType, dimension0, dimension1, dimension2>::Swap(Tensor<ElementType, dimension0, dimension1, dimension2>& other)
-    {
-        TensorRef::Swap(other);
-        std::swap(_data, other._data);
-    }
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    void TensorArchiver::Write(const Tensor<ElementType, dimension0, dimension1, dimension2>& tensor, const std::string& name, utilities::Archiver& archiver)
-    {
-        archiver[GetRowsName(name)] << tensor.NumRows();
-        archiver[GetColumnsName(name)] << tensor.NumColumns();
-        archiver[GetChannelsName(name)] << tensor.NumChannels();
-        archiver[GetValuesName(name)] << tensor.ToArray();
-    }
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    void TensorArchiver::Read(Tensor<ElementType, dimension0, dimension1, dimension2>& tensor, const std::string& name, utilities::Unarchiver& archiver)
-    {
-        size_t rows = 0;
-        size_t columns = 0;
-        size_t channels = 0;
-        std::vector<ElementType> values;
-
-        archiver[GetRowsName(name)] >> rows;
-        archiver[GetColumnsName(name)] >> columns;
-        archiver[GetChannelsName(name)] >> channels;
-        archiver[GetValuesName(name)] >> values;
-
-        Tensor<ElementType, dimension0, dimension1, dimension2> value(rows, columns, channels, std::move(values));
-
-        tensor = std::move(value);
-    }
-
-} // namespace math
-} // namespace ell
diff --git a/libraries/math/tcc/TensorOperations.tcc b/libraries/math/tcc/TensorOperations.tcc
deleted file mode 100644
index 16089bc85..000000000
--- a/libraries/math/tcc/TensorOperations.tcc
+++ /dev/null
@@ -1,221 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     TensorOperations.tcc (math)
-//  Authors:  Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#include "../include/MatrixOperations.h"
-
-#include <utilities/include/Logger.h>
-
-namespace ell
-{
-namespace math
-{
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    void Print(ConstTensorReference<ElementType, dimension0, dimension1, dimension2> tensor, std::ostream& stream, size_t row, size_t column)
-    {
-        stream << "{" << tensor(row, column, 0);
-        for (size_t k = 1; k < tensor.NumChannels(); ++k)
-        {
-            stream << ", " << tensor(row, column, k);
-        }
-        stream << "}";
-    }
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    void Print(ConstTensorReference<ElementType, dimension0, dimension1, dimension2> tensor, std::ostream& stream, size_t row)
-    {
-        stream << "{ ";
-        Print(tensor, stream, row, 0);
-        for (size_t j = 1; j < tensor.NumColumns(); ++j)
-        {
-            stream << ", ";
-            Print(tensor, stream, row, j);
-        }
-        stream << " }";
-    }
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    void Print(ConstTensorReference<ElementType, dimension0, dimension1, dimension2> tensor, std::ostream& stream)
-    {
-        using namespace logging;
-
-        stream << "{ ";
-        Print(tensor, stream, 0);
-        for (size_t i = 1; i < tensor.NumRows(); ++i)
-        {
-            stream << "," << EOL << "  ";
-            Print(tensor, stream, i);
-        }
-        stream << " }" << EOL;
-    }
-
-    template <typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    std::ostream& operator<<(std::ostream& stream, ConstTensorReference<ElementType, dimension0, dimension1, dimension2> tensor)
-    {
-        Print(tensor, stream);
-        return stream;
-    }
-
-    template <typename TensorElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2, typename ScalarType, utilities::IsFundamental<ScalarType>>
-    void operator+=(TensorReference<TensorElementType, dimension0, dimension1, dimension2> tensor, ScalarType scalar)
-    {
-        AddUpdate(static_cast<TensorElementType>(scalar), tensor);
-    }
-
-    template <typename TensorElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2, typename ScalarType, utilities::IsFundamental<ScalarType>>
-    void operator-=(TensorReference<TensorElementType, dimension0, dimension1, dimension2> tensor, ScalarType scalar)
-    {
-        AddUpdate(-static_cast<TensorElementType>(scalar), tensor);
-    }
-
-    template <typename TensorElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2, typename ScalarType, utilities::IsFundamental<ScalarType>>
-    void operator*=(TensorReference<TensorElementType, dimension0, dimension1, dimension2> tensor, ScalarType scalar)
-    {
-        ScaleUpdate(static_cast<TensorElementType>(scalar), tensor);
-    }
-
-    template <typename TensorElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2, typename ScalarType, utilities::IsFundamental<ScalarType>>
-    void operator/=(TensorReference<TensorElementType, dimension0, dimension1, dimension2> tensor, ScalarType scalar)
-    {
-        DEBUG_THROW(scalar == 0, utilities::NumericException(utilities::NumericExceptionErrors::divideByZero, "Divide by zero."));
-
-        ScaleUpdate(static_cast<TensorElementType>(1.0 / scalar), tensor);
-    }
-
-    template <ImplementationType implementation, typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    void ScaleUpdate(ElementType scalar, TensorReference<ElementType, dimension0, dimension1, dimension2> tensor)
-    {
-        for (size_t i = 0; i < tensor.NumPrimarySlices(); ++i)
-        {
-            ScaleUpdate<implementation>(scalar, tensor.GetPrimarySlice(i));
-        }
-    }
-
-    template <Dimension vectorOrientation, ImplementationType implementation, typename ElementType, Dimension dimension0, Dimension dimension1>
-    void ScaleUpdate(UnorientedConstVectorBase<ElementType> vector, TensorReference<ElementType, dimension0, dimension1, vectorOrientation> tensor)
-    {
-        for (size_t i = 0; i < vector.Size(); ++i)
-        {
-            math::ScaleUpdate<implementation>(vector[i], tensor.template GetSlice<dimension0, dimension1>(i));
-        }
-    }
-
-    template <Dimension vectorOrientation, ImplementationType implementation, typename ElementType, Dimension dimension0, Dimension dimension2>
-    void ScaleUpdate(UnorientedConstVectorBase<ElementType> vector, TensorReference<ElementType, dimension0, vectorOrientation, dimension2> tensor)
-    {
-        for (size_t i = 0; i < vector.Size(); ++i)
-        {
-            math::ScaleUpdate<implementation>(vector[i], tensor.template GetSlice<dimension0, dimension2>(i));
-        }
-    }
-
-    template <Dimension vectorOrientation, ImplementationType implementation, typename ElementType, Dimension dimension1, Dimension dimension2>
-    void ScaleUpdate(UnorientedConstVectorBase<ElementType> vector, TensorReference<ElementType, vectorOrientation, dimension1, dimension2> tensor)
-    {
-        for (size_t i = 0; i < tensor.GetSize2(); ++i)
-        {
-            auto M = tensor.GetPrimarySlice(i);
-            for (size_t j = 0; j < tensor.GetSize0(); ++j)
-            {
-                auto u = M.GetRow(j);
-                math::ScaleUpdate<implementation>(vector[j], u);
-            }
-        }
-    }
-
-    template <ImplementationType implementation, typename ElementType, Dimension dimension0, Dimension dimension1, Dimension dimension2>
-    void AddUpdate(ElementType scalar, TensorReference<ElementType, dimension0, dimension1, dimension2> tensor)
-    {
-        for (size_t i = 0; i < tensor.NumPrimarySlices(); ++i)
-        {
-            AddUpdate<implementation>(scalar, tensor.GetPrimarySlice(i));
-        }
-    }
-
-    template <Dimension vectorOrientation, ImplementationType implementation, typename ElementType, Dimension dimension0, Dimension dimension1>
-    void AddUpdate(UnorientedConstVectorBase<ElementType> vector, TensorReference<ElementType, dimension0, dimension1, vectorOrientation> tensor)
-    {
-        DEBUG_CHECK_SIZES(vector.Size() != tensor.GetSize2(), "vector and tensor dimensions must be the same");
-
-        for (size_t i = 0; i < vector.Size(); ++i)
-        {
-            AddUpdate<implementation>(vector[i], tensor.template GetSlice<dimension0, dimension1>(i));
-        }
-    }
-
-    template <Dimension vectorOrientation, ImplementationType implementation, typename ElementType, Dimension dimension0, Dimension dimension2>
-    void AddUpdate(UnorientedConstVectorBase<ElementType> vector, TensorReference<ElementType, dimension0, vectorOrientation, dimension2> tensor)
-    {
-        DEBUG_CHECK_SIZES(vector.Size() != tensor.GetSize1(), "vector and tensor dimensions must be the same");
-        for (size_t i = 0; i < vector.Size(); ++i)
-        {
-            AddUpdate<implementation>(vector[i], tensor.template GetSlice<dimension0, dimension2>(i));
-        }
-    }
-
-    template <Dimension vectorOrientation, ImplementationType implementation, typename ElementType, Dimension dimension1, Dimension dimension2>
-    void AddUpdate(ConstRowVectorReference<ElementType> vector, TensorReference<ElementType, vectorOrientation, dimension1, dimension2> tensor)
-    {
-        AddUpdate<vectorOrientation, implementation>(vector.Transpose(), tensor);
-    }
-
-    template <Dimension vectorOrientation, ImplementationType implementation, typename ElementType, Dimension dimension1, Dimension dimension2>
-    void AddUpdate(ConstColumnVectorReference<ElementType> vector, TensorReference<ElementType, vectorOrientation, dimension1, dimension2> tensor)
-    {
-        DEBUG_CHECK_SIZES(vector.Size() != tensor.GetSize0(), "vector and tensor dimensions must be the same");
-
-        for (size_t i = 0; i < tensor.GetSize2(); ++i)
-        {
-            auto M = tensor.GetPrimarySlice(i);
-            for (size_t j = 0; j < tensor.GetSize1(); ++j)
-            {
-                auto u = M.GetColumn(j);
-                AddUpdate<implementation>(vector, u);
-            }
-        }
-    }
-
-    template <Dimension vectorOrientation, ImplementationType implementation, typename ElementType, Dimension dimension0, Dimension dimension1>
-    void ScaleAddUpdate(UnorientedConstVectorBase<ElementType> scale, UnorientedConstVectorBase<ElementType> bias, TensorReference<ElementType, dimension0, dimension1, vectorOrientation> tensor)
-    {
-        DEBUG_CHECK_SIZES(scale.Size() != tensor.GetSize2(), "vector and tensor dimensions must be the same");
-        for (size_t i = 0; i < scale.Size(); ++i)
-        {
-            ScaleAddUpdate<implementation>(scale[i], OnesMatrix(), bias[i], tensor.template GetSlice<dimension0, dimension1>(i));
-        }
-    }
-
-    template <Dimension vectorOrientation, ImplementationType implementation, typename ElementType, Dimension dimension0, Dimension dimension2>
-    void ScaleAddUpdate(UnorientedConstVectorBase<ElementType> scale, UnorientedConstVectorBase<ElementType> bias, TensorReference<ElementType, dimension0, vectorOrientation, dimension2> tensor)
-    {
-        DEBUG_CHECK_SIZES(scale.Size() != tensor.GetSize1(), "vector and tensor dimensions must be the same");
-        for (size_t i = 0; i < scale.Size(); ++i)
-        {
-            ScaleAddUpdate<implementation>(scale[i], OnesMatrix(), bias[i], tensor.template GetSlice<dimension0, dimension2>(i));
-        }
-    }
-
-    template <Dimension vectorOrientation, ImplementationType implementation, typename ElementType, Dimension dimension1, Dimension dimension2>
-    void ScaleAddUpdate(UnorientedConstVectorBase<ElementType> scale, UnorientedConstVectorBase<ElementType> bias, TensorReference<ElementType, vectorOrientation, dimension1, dimension2> tensor)
-    {
-        DEBUG_CHECK_SIZES(scale.Size() != tensor.GetSize0() || bias.Size() != tensor.GetSize0(), "vectors and tensor dimensions must be the same");
-
-        for (size_t i = 0; i < tensor.GetSize2(); ++i)
-        {
-            auto M = tensor.GetPrimarySlice(i);
-            for (size_t j = 0; j < tensor.GetSize1(); ++j)
-            {
-                auto u = M.GetColumn(j);
-                for (size_t k = 0; k < tensor.GetSize0(); ++k)
-                {
-                    u[k] = scale[k] * u[k] + bias[k];
-                }
-            }
-        }
-    }
-} // namespace math
-} // namespace ell
diff --git a/libraries/math/tcc/Vector.tcc b/libraries/math/tcc/Vector.tcc
deleted file mode 100644
index d9fd4760b..000000000
--- a/libraries/math/tcc/Vector.tcc
+++ /dev/null
@@ -1,434 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     Vector.tcc (math)
-//  Authors:  Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#include <utilities/include/Debug.h>
-#include <utilities/include/Exception.h>
-
-namespace ell
-{
-namespace math
-{
-    //
-    // UnorientedConstVectorBase
-    //
-
-    template <typename ElementType>
-    UnorientedConstVectorBase<ElementType>::UnorientedConstVectorBase(const ElementType* pData, size_t size, size_t increment) :
-        _pData(pData),
-        _size(size),
-        _increment(increment)
-    {
-    }
-
-    template <typename ElementType>
-    const ElementType& UnorientedConstVectorBase<ElementType>::operator[](size_t index) const
-    {
-        DEBUG_THROW(index >= _size, utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "index exceeds vector size."));
-
-        return GetConstDataPointer()[index * _increment];
-    }
-
-    template <typename ElementType>
-    void UnorientedConstVectorBase<ElementType>::Swap(UnorientedConstVectorBase<ElementType>& other)
-    {
-        std::swap(_pData, other._pData);
-        std::swap(_size, other._size);
-        std::swap(_increment, other._increment);
-    }
-
-    template <typename ElementType>
-    ElementType UnorientedConstVectorBase<ElementType>::Norm0() const
-    {
-        return Aggregate([](ElementType x) { return x != 0 ? 1 : 0; });
-    }
-
-    template <typename ElementType>
-    ElementType UnorientedConstVectorBase<ElementType>::Norm1() const
-    {
-        return Aggregate([](ElementType x) { return std::abs(x); });
-    }
-
-    template <typename ElementType>
-    ElementType UnorientedConstVectorBase<ElementType>::Norm2() const
-    {
-        return std::sqrt(Norm2Squared());
-    }
-
-    template <typename ElementType>
-    ElementType UnorientedConstVectorBase<ElementType>::Norm2Squared() const
-    {
-        return Aggregate([](ElementType x) { return x * x; });
-    }
-
-    template <typename ElementType>
-    ElementType UnorientedConstVectorBase<ElementType>::NormInfinity() const
-    {
-        if (_size == 0)
-        {
-            return 0;
-        }
-
-        const ElementType* pData = GetConstDataPointer();
-        const ElementType* pEnd = pData + _size * _increment;
-        ElementType result = *pData;
-        pData += _increment;
-
-        while (pData < pEnd)
-        {
-            result = std::max(result, std::abs(*pData));
-            pData += _increment;
-        }
-
-        return result;
-    }
-
-    template <typename ElementType>
-    template <typename MapperType>
-    ElementType UnorientedConstVectorBase<ElementType>::Aggregate(MapperType mapper) const
-    {
-        ElementType result = 0;
-        const ElementType* current = GetConstDataPointer();
-        const ElementType* end = current + _size * _increment;
-        while (current < end)
-        {
-            result += mapper(*current);
-            current += _increment;
-        }
-        return result;
-    }
-
-    template <typename ElementType>
-    std::vector<ElementType> UnorientedConstVectorBase<ElementType>::ToArray() const
-    {
-        std::vector<ElementType> result(_size);
-
-        const ElementType* pData = GetConstDataPointer();
-        for (size_t i = 0; i < _size; ++i, pData += _increment)
-            result[i] = *pData;
-
-        return result;
-    }
-
-    //
-    // ConstVectorReference
-    //
-
-    template <typename ElementType, VectorOrientation orientation>
-    ConstVectorReference<ElementType, orientation>::ConstVectorReference(const ElementType* pData, size_t size, size_t increment) :
-        UnorientedConstVectorBase<ElementType>(pData, size, increment)
-    {
-    }
-
-    template <typename ElementType, VectorOrientation orientation>
-    void ConstVectorReference<ElementType, orientation>::Swap(ConstVectorReference<ElementType, orientation>& other)
-    {
-        UnorientedConstVectorBase<ElementType>::Swap(other);
-    }
-
-    template <typename ElementType, VectorOrientation orientation>
-    bool ConstVectorReference<ElementType, orientation>::IsEqual(ConstVectorReference<ElementType, orientation> other, ElementType tolerance) const
-    {
-        if (this->Size() != other.Size())
-        {
-            return false;
-        }
-
-        const ElementType* pThis = this->GetConstDataPointer();
-        const ElementType* pThisEnd = pThis + this->Size() * this->GetIncrement();
-        const ElementType* pOther = other.GetConstDataPointer();
-
-        while (pThis < pThisEnd)
-        {
-            auto diff = (*pThis) - (*pOther);
-
-            if (diff > tolerance || -diff > tolerance)
-            {
-                return false;
-            }
-            pThis += this->GetIncrement();
-            pOther += other.GetIncrement();
-        }
-        return true;
-    }
-
-    template <typename ElementType, VectorOrientation orientation>
-    bool ConstVectorReference<ElementType, orientation>::operator==(const ConstVectorReference<ElementType, orientation>& other) const
-    {
-        return IsEqual(other);
-    }
-
-    template <typename ElementType, VectorOrientation orientation>
-    bool ConstVectorReference<ElementType, orientation>::operator!=(const ConstVectorReference<ElementType, orientation>& other) const
-    {
-        return !(*this == other);
-    }
-
-    template <typename ElementType, VectorOrientation orientation>
-    ConstVectorReference<ElementType, orientation> ConstVectorReference<ElementType, orientation>::GetSubVector(size_t offset, size_t size) const
-    {
-        DEBUG_THROW(offset + size > this->Size(), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "subvector offset + subvector size exceeds vector size."));
-
-        return ConstVectorReference<ElementType, orientation>(this->GetConstDataPointer() + offset * this->GetIncrement(), size, this->GetIncrement());
-    }
-
-    //
-    // VectorReference
-    //
-
-    template <typename ElementType, VectorOrientation orientation>
-    VectorReference<ElementType, orientation>::VectorReference(const ElementType* pData, size_t size, size_t increment) :
-        ConstVectorReference<ElementType, orientation>(pData, size, increment)
-    {
-    }
-
-    template <typename ElementType, VectorOrientation orientation>
-    ElementType& VectorReference<ElementType, orientation>::operator[](size_t index)
-    {
-        DEBUG_THROW(index >= this->Size(), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "index exceeds vector size."));
-
-        return GetDataPointer()[index * this->GetIncrement()];
-    }
-
-    template <typename ElementType, VectorOrientation orientation>
-    void VectorReference<ElementType, orientation>::Swap(VectorReference<ElementType, orientation>& other)
-    {
-        ConstVectorReference<ElementType, orientation>::Swap(other);
-    }
-
-    template <typename ElementType, VectorOrientation orientation>
-    template <typename OtherElementType>
-    void VectorReference<ElementType, orientation>::CopyFrom(ConstVectorReference<OtherElementType, orientation> other)
-    {
-        if (this->Size() != other.Size())
-        {
-            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "this vector and other vector are not the same size.");
-        }
-
-        ElementType* pData = GetDataPointer();
-        const OtherElementType* pOtherData = other.GetConstDataPointer();
-        const size_t otherIncrement = other.GetIncrement();
-        const OtherElementType* pOtherEnd = pOtherData + otherIncrement * other.Size();
-
-        if (this->GetIncrement() == 1 && otherIncrement == 1)
-        {
-            while (pOtherData < pOtherEnd)
-            {
-                (*pData) = static_cast<ElementType>(*pOtherData);
-                ++pData;
-                ++pOtherData;
-            }
-        }
-        else
-        {
-            while (pOtherData < pOtherEnd)
-            {
-                (*pData) = static_cast<ElementType>(*pOtherData);
-                pData += this->GetIncrement();
-                pOtherData += otherIncrement;
-            }
-        }
-    }
-
-    template <typename ElementType, VectorOrientation orientation>
-    void VectorReference<ElementType, orientation>::Reset()
-    {
-        Fill(0);
-    }
-
-    template <typename ElementType, VectorOrientation orientation>
-    void VectorReference<ElementType, orientation>::Fill(ElementType value)
-    {
-        ElementType* data = GetDataPointer();
-        ElementType* end = data + this->Size() * this->GetIncrement();
-
-        if (this->IsContiguous())
-        {
-            std::fill(data, end, value);
-        }
-        else
-        {
-            while (data < end)
-            {
-                *data = value;
-                data += this->GetIncrement();
-            }
-        }
-    }
-
-    template <typename ElementType, VectorOrientation orientation>
-    template <typename GeneratorType>
-    void VectorReference<ElementType, orientation>::Generate(GeneratorType generator)
-    {
-        ElementType* data = GetDataPointer();
-        ElementType* end = data + this->Size() * this->GetIncrement();
-
-        while (data < end)
-        {
-            *data = static_cast<ElementType>(generator());
-            data += this->GetIncrement();
-        }
-    }
-
-    template <typename ElementType, VectorOrientation orientation>
-    template <typename TransformationType>
-    void VectorReference<ElementType, orientation>::Transform(TransformationType transformation)
-    {
-        ElementType* pData = this->GetDataPointer();
-        const ElementType* pEnd = pData + this->Size() * this->GetIncrement();
-        while (pData < pEnd)
-        {
-            *pData = transformation(*pData);
-            pData += this->GetIncrement();
-        }
-    }
-
-    template <typename ElementType, VectorOrientation orientation>
-    VectorReference<ElementType, orientation> VectorReference<ElementType, orientation>::GetReference()
-    {
-        return VectorReference<ElementType, orientation>(GetDataPointer(), this->Size(), this->GetIncrement());
-    }
-
-    template <typename ElementType, VectorOrientation orientation>
-    VectorReference<ElementType, orientation> VectorReference<ElementType, orientation>::GetSubVector(size_t offset, size_t size)
-    {
-        DEBUG_THROW(offset + size > this->Size(), utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "subvector offset + subvector size exceeds vector size."));
-
-        return VectorReference<ElementType, orientation>(GetDataPointer() + offset * this->GetIncrement(), size, this->GetIncrement());
-    }
-
-    //
-    // Vector
-    //
-
-    template <typename ElementType, VectorOrientation orientation>
-    Vector<ElementType, orientation>::Vector(size_t size) :
-        VectorReference<ElementType, orientation>(nullptr, size, 1),
-        _data(size)
-    {
-        this->_pData = _data.data();
-    }
-
-    template <typename ElementType, VectorOrientation orientation>
-    Vector<ElementType, orientation>::Vector(std::vector<ElementType> data) :
-        VectorReference<ElementType, orientation>(nullptr, data.size(), 1),
-        _data(std::move(data))
-    {
-        this->_pData = _data.data();
-    }
-
-    template <typename ElementType, VectorOrientation orientation>
-    Vector<ElementType, orientation>::Vector(std::initializer_list<ElementType> list) :
-        VectorReference<ElementType, orientation>(nullptr, list.size(), 1),
-        _data(list.begin(), list.end())
-    {
-        this->_pData = _data.data();
-    }
-
-    template <typename ElementType, VectorOrientation orientation>
-    Vector<ElementType, orientation>::Vector(Vector<ElementType, orientation>&& other) :
-        VectorReference<ElementType, orientation>(nullptr, other.Size(), other.GetIncrement()),
-        _data(std::move(other._data))
-    {
-        this->_pData = _data.data();
-    }
-
-    template <typename ElementType, VectorOrientation orientation>
-    Vector<ElementType, orientation>::Vector(const Vector<ElementType, orientation>& other) :
-        VectorReference<ElementType, orientation>(nullptr, other.Size(), 1),
-        _data(other.Size())
-    {
-        _pData = _data.data();
-        this->CopyFrom(other);
-    }
-
-    template <typename ElementType, VectorOrientation orientation>
-    Vector<ElementType, orientation>::Vector(ConstVectorReference<ElementType, orientation>& other) :
-        VectorReference<ElementType, orientation>(nullptr, other.Size(), 1),
-        _data(other.Size())
-    {
-        _pData = _data.data();
-        this->CopyFrom(other);
-    }
-
-    template <typename ElementType, VectorOrientation orientation>
-    Vector<ElementType, orientation>::Vector(ConstVectorReference<ElementType, TransposeVectorOrientation<orientation>::value>& other) :
-        VectorReference<ElementType, orientation>(nullptr, other.Size(), 1),
-        _data(other.Size())
-    {
-        _pData = _data.data();
-        this->CopyFrom(other);
-    }
-
-    template <typename ElementType, VectorOrientation orientation>
-    void Vector<ElementType, orientation>::Resize(size_t size)
-    {
-        _data.resize(size);
-        this->_pData = _data.data();
-        this->_size = size;
-    }
-
-    template <typename ElementType, VectorOrientation orientation>
-    Vector<ElementType, orientation>& Vector<ElementType, orientation>::operator=(Vector<ElementType, orientation> other)
-    {
-        Swap(other);
-        return *this;
-    }
-
-    template <typename ElementType, VectorOrientation orientation>
-    void Vector<ElementType, orientation>::Swap(Vector<ElementType, orientation>& other)
-    {
-        VectorReference<ElementType, orientation>::Swap(other);
-        std::swap(_data, other._data);
-    }
-
-    template <typename ElementType, VectorOrientation orientation>
-    utilities::StlStridedIterator<typename std::vector<ElementType>::iterator> begin(Vector<ElementType, orientation>& vector)
-    {
-        return { vector._data.begin(), static_cast<ptrdiff_t>(vector.GetIncrement()) };
-    }
-
-    template <typename ElementType, VectorOrientation orientation>
-    utilities::StlStridedIterator<typename std::vector<ElementType>::const_iterator> begin(const Vector<ElementType, orientation>& vector)
-    {
-        return { vector._data.cbegin(), static_cast<ptrdiff_t>(vector.GetIncrement()) };
-    }
-
-    template <typename ElementType, VectorOrientation orientation>
-    utilities::StlStridedIterator<typename std::vector<ElementType>::iterator> end(Vector<ElementType, orientation>& vector)
-    {
-        return { vector._data.end(), static_cast<ptrdiff_t>(vector.GetIncrement()) };
-    }
-
-    template <typename ElementType, VectorOrientation orientation>
-    utilities::StlStridedIterator<typename std::vector<ElementType>::const_iterator> end(const Vector<ElementType, orientation>& vector)
-    {
-        return { vector._data.cend(), static_cast<ptrdiff_t>(vector.GetIncrement()) };
-    }
-
-    //
-    // VectorArchiver
-    //
-    template <typename ElementType, VectorOrientation orientation>
-    void VectorArchiver::Write(const Vector<ElementType, orientation>& vector, const std::string& name, utilities::Archiver& archiver)
-    {
-        archiver[name] << vector.ToArray();
-    }
-
-    template <typename ElementType, VectorOrientation orientation>
-    void VectorArchiver::Read(Vector<ElementType, orientation>& vector, const std::string& name, utilities::Unarchiver& archiver)
-    {
-        std::vector<ElementType> values;
-
-        archiver[name] >> values;
-
-        Vector<ElementType, orientation> value(std::move(values));
-
-        vector.Swap(value);
-    }
-} // namespace math
-} // namespace ell
diff --git a/libraries/math/tcc/VectorOperations.tcc b/libraries/math/tcc/VectorOperations.tcc
deleted file mode 100644
index 314c5d972..000000000
--- a/libraries/math/tcc/VectorOperations.tcc
+++ /dev/null
@@ -1,832 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     VectorOperations.tcc (math)
-//  Authors:  Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#include <utilities/include/Debug.h>
-#include <utilities/include/Exception.h>
-
-namespace ell
-{
-namespace math
-{
-    template <typename ElementType, VectorOrientation orientation>
-    void Print(ConstVectorReference<ElementType, orientation> vector, std::ostream& stream, size_t indent, size_t maxElements)
-    {
-        DEBUG_CHECK_SIZES(maxElements < 3, "cannot specify maxElements below 3.");
-
-        stream << std::string(indent, ' ');
-        if (vector.Size() == 0)
-        {
-            stream << "{ }";
-        }
-        else if (vector.Size() <= maxElements)
-        {
-            stream << "{ " << vector[0];
-            for (size_t i = 1; i < vector.Size(); ++i)
-            {
-                stream << ", " << vector[i];
-            }
-            stream << " }";
-        }
-        else
-        {
-            stream << "{ " << vector[0];
-            for (size_t i = 1; i < maxElements - 2; ++i)
-            {
-                stream << ", " << vector[i];
-            }
-            stream << ", ..., " << vector[vector.Size() - 1] << " }";
-        }
-    }
-
-    template <typename ElementType, VectorOrientation orientation>
-    std::ostream& operator<<(std::ostream& stream, ConstVectorReference<ElementType, orientation> vector)
-    {
-        Print(vector, stream);
-        return stream;
-    }
-
-    template <typename ElementType, VectorOrientation orientation, typename TransformationType>
-    TransformedConstVectorReference<ElementType, orientation, TransformationType>::TransformedConstVectorReference(ConstVectorReference<ElementType, orientation> vector, TransformationType transformation) :
-        _vector(vector),
-        _transformation(std::move(transformation))
-    {
-    }
-
-    template <typename ElementType, VectorOrientation orientation, typename TransformationType>
-    TransformedConstVectorReference<ElementType, orientation, TransformationType> TransformVector(ConstVectorReference<ElementType, orientation> vector, TransformationType transformation)
-    {
-        return TransformedConstVectorReference<ElementType, orientation, TransformationType>(vector, transformation);
-    }
-
-    template <typename ElementType, VectorOrientation orientation>
-    auto Square(ConstVectorReference<ElementType, orientation> vector) -> TransformedConstVectorReference<ElementType, orientation, Transformation<ElementType>>
-    {
-        return TransformVector(vector, SquareTransformation<ElementType>);
-    }
-
-    template <typename ElementType, VectorOrientation orientation>
-    auto Sqrt(ConstVectorReference<ElementType, orientation> vector) -> TransformedConstVectorReference<ElementType, orientation, Transformation<ElementType>>
-    {
-        return TransformVector(vector, SquareRootTransformation<ElementType>);
-    }
-
-    template <typename ElementType, VectorOrientation orientation>
-    auto Abs(ConstVectorReference<ElementType, orientation> vector) -> TransformedConstVectorReference<ElementType, orientation, Transformation<ElementType>>
-    {
-        return TransformVector(vector, AbsoluteValueTransformation<ElementType>);
-    }
-
-    template <typename ElementType>
-    ElementType ScaleFunction<ElementType>::operator()(ElementType x)
-    {
-        return x * _value;
-    }
-
-    template <typename ElementType, VectorOrientation orientation>
-    auto operator*(double scalar, ConstVectorReference<ElementType, orientation> vector) -> TransformedConstVectorReference<ElementType, orientation, ScaleFunction<ElementType>>
-    {
-        ScaleFunction<ElementType> transformation{ static_cast<ElementType>(scalar) };
-        return TransformVector(vector, transformation);
-    }
-
-    template <typename VectorElementType, VectorOrientation orientation, typename ScalarElementType, utilities::IsFundamental<ScalarElementType> concept>
-    void operator+=(VectorReference<VectorElementType, orientation> vector, ScalarElementType scalar)
-    {
-        AddUpdate(static_cast<VectorElementType>(scalar), vector);
-    }
-
-    template <typename ElementType, VectorOrientation orientation>
-    void operator+=(VectorReference<ElementType, orientation> vectorB, ConstVectorReference<ElementType, orientation> vectorA)
-    {
-        AddUpdate(vectorA, vectorB);
-    }
-
-    template <typename ElementType, VectorOrientation orientation, typename TransformationType>
-    void operator+=(VectorReference<ElementType, orientation> vector, TransformedConstVectorReference<ElementType, orientation, TransformationType> transformedVector)
-    {
-        TransformAddUpdate(transformedVector.GetTransformation(), transformedVector.GetVector(), vector);
-    }
-
-    template <typename VectorElementType, VectorOrientation orientation, typename ScalarElementType, utilities::IsFundamental<ScalarElementType> concept>
-    void operator-=(VectorReference<VectorElementType, orientation> vector, ScalarElementType scalar)
-    {
-        AddUpdate(static_cast<VectorElementType>(-scalar), vector);
-    }
-
-    template <typename ElementType, VectorOrientation orientation>
-    void operator-=(VectorReference<ElementType, orientation> vectorB, ConstVectorReference<ElementType, orientation> vectorA)
-    {
-        ScaleAddUpdate(static_cast<ElementType>(-1), vectorA, One(), vectorB);
-    }
-
-    template <typename VectorElementType, VectorOrientation orientation, typename ScalarElementType, utilities::IsFundamental<ScalarElementType> concept>
-    void operator*=(VectorReference<VectorElementType, orientation> vector, ScalarElementType scalar)
-    {
-        ScaleUpdate(static_cast<VectorElementType>(scalar), vector);
-    }
-
-    template <typename VectorElementType, VectorOrientation orientation, typename ScalarElementType, utilities::IsFundamental<ScalarElementType> concept>
-    void operator/=(VectorReference<VectorElementType, orientation> vector, ScalarElementType scalar)
-    {
-        DEBUG_THROW(scalar == 0, utilities::NumericException(utilities::NumericExceptionErrors::divideByZero, "Divide by zero."));
-
-        ScaleUpdate(1 / static_cast<VectorElementType>(scalar), vector);
-    }
-
-    // vector += scalar
-    template <ImplementationType implementation, typename ElementType, VectorOrientation orientation>
-    void AddUpdate(ElementType scalar, VectorReference<ElementType, orientation> vector)
-    {
-        if (scalar == 0)
-        {
-            return;
-        }
-        else
-        {
-            Internal::VectorOperations<implementation>::AddUpdate(scalar, vector);
-        }
-    }
-
-    // vectorB += vectorA
-    template <ImplementationType implementation, typename ElementType, VectorOrientation orientation>
-    void AddUpdate(ConstVectorReference<ElementType, orientation> vectorA, VectorReference<ElementType, orientation> vectorB)
-    {
-        DEBUG_CHECK_SIZES(vectorB.Size() != vectorA.Size(), "Incompatible vector sizes.");
-
-        Internal::VectorOperations<implementation>::AddUpdate(vectorA, vectorB);
-    }
-
-    // output = scalar + vector
-    template <ImplementationType implementation, typename ElementType, VectorOrientation orientation>
-    void AddSet(ElementType scalar, ConstVectorReference<ElementType, orientation> vector, VectorReference<ElementType, orientation> output)
-    {
-        if (scalar == 0)
-        {
-            output.CopyFrom(vector);
-        }
-        else
-        {
-            Internal::VectorOperations<implementation>::AddSet(scalar, vector, output);
-        }
-    }
-
-    // output = vectorA + vectorB
-    template <ImplementationType implementation, typename ElementType, VectorOrientation orientation>
-    void AddSet(ConstVectorReference<ElementType, orientation> vectorA, ConstVectorReference<ElementType, orientation> vectorB, VectorReference<ElementType, orientation> output)
-    {
-        DEBUG_CHECK_SIZES(vectorA.Size() != vectorB.Size(), "Incompatible vector sizes.");
-
-        Internal::VectorOperations<implementation>::AddSet(vectorA, vectorB, output);
-    }
-
-    // vector *= scalar
-    template <ImplementationType implementation, typename ElementType, VectorOrientation orientation>
-    void ScaleUpdate(ElementType scalar, VectorReference<ElementType, orientation> vector)
-    {
-        if (scalar == 1)
-        {
-            return;
-        }
-        else if (scalar == 0)
-        {
-            vector.Reset();
-        }
-        else
-        {
-            Internal::VectorOperations<implementation>::ScaleUpdate(scalar, vector);
-        }
-    }
-
-    // output = scalar * vector
-    template <ImplementationType implementation, typename ElementType, VectorOrientation orientation>
-    void ScaleSet(ElementType scalar, ConstVectorReference<ElementType, orientation> vector, VectorReference<ElementType, orientation> output)
-    {
-        DEBUG_CHECK_SIZES(vector.Size() != output.Size(), "Incompatible vector sizes.");
-
-        if (scalar == 1)
-        {
-            output.CopyFrom(vector);
-        }
-        else if (scalar == 0)
-        {
-            output.Reset();
-        }
-        else
-        {
-            Internal::VectorOperations<implementation>::ScaleSet(scalar, vector, output);
-        }
-    }
-
-    // vectorB += scalarA * vectorA
-    template <ImplementationType implementation, typename ElementType, VectorOrientation orientation>
-    void ScaleAddUpdate(ElementType scalarA, ConstVectorReference<ElementType, orientation> vectorA, One, VectorReference<ElementType, orientation> vectorB)
-    {
-        DEBUG_CHECK_SIZES(vectorB.Size() != vectorA.Size(), "Incompatible vector sizes.");
-
-        if (scalarA == 0)
-        {
-            return;
-        }
-        else if (scalarA == 1)
-        {
-            AddUpdate<implementation>(vectorA, vectorB);
-        }
-        else
-        {
-            Internal::VectorOperations<implementation>::ScaleAddUpdate(scalarA, vectorA, One(), vectorB);
-        }
-    }
-
-    // vectorB = scalarA + scalarB * vectorB
-    template <ImplementationType implementation, typename ElementType, VectorOrientation orientation>
-    void ScaleAddUpdate(ElementType scalarA, OnesVector, ElementType scalarB, VectorReference<ElementType, orientation> vectorB)
-    {
-        if (scalarA == 0)
-        {
-            ScaleUpdate<implementation>(scalarB, vectorB);
-        }
-        else if (scalarB == 0)
-        {
-            vectorB.Fill(scalarA);
-        }
-        else if (scalarB == 1)
-        {
-            Internal::VectorOperations<implementation>::AddUpdate(scalarA, vectorB);
-        }
-        else
-        {
-            Internal::VectorOperations<implementation>::ScaleAddUpdate(scalarA, OnesVector(), scalarB, vectorB);
-        }
-    }
-
-    // vectorB = vectorA + scalarB * vectorB
-    template <ImplementationType implementation, typename ElementType, VectorOrientation orientation>
-    void ScaleAddUpdate(One, ConstVectorReference<ElementType, orientation> vectorA, ElementType scalarB, VectorReference<ElementType, orientation> vectorB)
-    {
-        DEBUG_CHECK_SIZES(vectorB.Size() != vectorA.Size(), "Incompatible vector sizes.");
-
-        if (scalarB == 0)
-        {
-            vectorB.CopyFrom(vectorA);
-        }
-        else if (scalarB == 1)
-        {
-            Internal::VectorOperations<implementation>::AddUpdate(vectorA, vectorB);
-        }
-        else
-        {
-            Internal::VectorOperations<implementation>::ScaleAddUpdate(One(), vectorA, scalarB, vectorB);
-        }
-    }
-
-    // vectorB =  scalarA * vectorA + scalarB * vectorB
-    template <ImplementationType implementation, typename ElementType, VectorOrientation orientation>
-    void ScaleAddUpdate(ElementType scalarA, ConstVectorReference<ElementType, orientation> vectorA, ElementType scalarB, VectorReference<ElementType, orientation> vectorB)
-    {
-        DEBUG_CHECK_SIZES(vectorB.Size() != vectorA.Size(), "Incompatible vector sizes.");
-
-        if (scalarA == 0)
-        {
-            ScaleUpdate<implementation>(scalarB, vectorB);
-        }
-        else if (scalarA == 1)
-        {
-            ScaleAddUpdate<implementation>(One(), vectorA, scalarB, vectorB);
-        }
-        else if (scalarB == 0)
-        {
-            Internal::VectorOperations<implementation>::ScaleSet(scalarA, vectorA, vectorB);
-        }
-        else if (scalarB == 1)
-        {
-            Internal::VectorOperations<implementation>::ScaleAddUpdate(scalarA, vectorA, One(), vectorB);
-        }
-        else
-        {
-            Internal::VectorOperations<implementation>::ScaleAddUpdate(scalarA, vectorA, scalarB, vectorB);
-        }
-    }
-
-    // output = scalarA * vectorA + vectorB
-    template <ImplementationType implementation, typename ElementType, VectorOrientation orientation>
-    void ScaleAddSet(ElementType scalarA, ConstVectorReference<ElementType, orientation> vectorA, One, ConstVectorReference<ElementType, orientation> vectorB, VectorReference<ElementType, orientation> output)
-    {
-        DEBUG_CHECK_SIZES(vectorB.Size() != vectorA.Size() || vectorA.Size() != output.Size(), "Incompatible vector sizes.");
-
-        if (scalarA == 0)
-        {
-            output.CopyFrom(vectorB);
-        }
-        else if (scalarA == 1)
-        {
-            Internal::VectorOperations<implementation>::AddSet(vectorA, vectorB, output);
-        }
-        else
-        {
-            Internal::VectorOperations<implementation>::ScaleAddSet(scalarA, vectorA, One(), vectorB, output);
-        }
-    }
-
-    // output = scalarA + scalarB * vectorB
-    template <ImplementationType implementation, typename ElementType, VectorOrientation orientation>
-    void ScaleAddSet(ElementType scalarA, OnesVector, ElementType scalarB, ConstVectorReference<ElementType, orientation> vectorB, VectorReference<ElementType, orientation> output)
-    {
-        DEBUG_CHECK_SIZES(vectorB.Size() != output.Size(), "Incompatible vector sizes.");
-
-        if (scalarA == 0)
-        {
-            ScaleSet<implementation>(scalarB, vectorB, output);
-        }
-        else if (scalarB == 0)
-        {
-            output.Fill(scalarA);
-        }
-        else if (scalarB == 1)
-        {
-            Internal::VectorOperations<implementation>::AddSet(scalarA, vectorB, output);
-        }
-        else
-        {
-            Internal::VectorOperations<implementation>::ScaleAddSet(scalarA, OnesVector(), scalarB, vectorB, output);
-        }
-    }
-
-    // output = vectorA + scalarB * vectorB
-    template <ImplementationType implementation, typename ElementType, VectorOrientation orientation>
-    void ScaleAddSet(One, ConstVectorReference<ElementType, orientation> vectorA, ElementType scalarB, ConstVectorReference<ElementType, orientation> vectorB, VectorReference<ElementType, orientation> output)
-    {
-        DEBUG_CHECK_SIZES(vectorB.Size() != vectorA.Size() || vectorA.Size() != output.Size(), "Incompatible vector sizes.");
-
-        if (scalarB == 0)
-        {
-            output.CopyFrom(vectorA);
-        }
-        else if (scalarB == 1)
-        {
-            Internal::VectorOperations<implementation>::AddSet(vectorA, vectorB, output);
-        }
-        else
-        {
-            Internal::VectorOperations<implementation>::ScaleAddSet(One(), vectorA, scalarB, vectorB, output);
-        }
-    }
-
-    // output = scalarA * vectorA + scalarB * vectorB
-    template <ImplementationType implementation, typename ElementType, VectorOrientation orientation>
-    void ScaleAddSet(ElementType scalarA, ConstVectorReference<ElementType, orientation> vectorA, ElementType scalarB, ConstVectorReference<ElementType, orientation> vectorB, VectorReference<ElementType, orientation> output)
-    {
-        DEBUG_CHECK_SIZES(vectorB.Size() != vectorA.Size() || vectorB.Size() != output.Size(), "Incompatible vector sizes.");
-
-        if (scalarA == 0)
-        {
-            ScaleSet<implementation>(scalarB, vectorB, output);
-        }
-        else if (scalarA == 1)
-        {
-            ScaleAddSet<implementation>(One(), vectorA, scalarB, vectorB, output);
-        }
-        else if (scalarB == 0)
-        {
-            Internal::VectorOperations<implementation>::ScaleSet(scalarA, vectorA, output);
-        }
-        else if (scalarB == 1)
-        {
-            Internal::VectorOperations<implementation>::ScaleAddSet(scalarA, vectorA, One(), vectorB, output);
-        }
-        else
-        {
-            Internal::VectorOperations<implementation>::ScaleAddSet(scalarA, vectorA, scalarB, vectorB, output);
-        }
-    }
-
-    template <typename ElementType, VectorOrientation orientation>
-    void ElementwiseMultiplySet(ConstVectorReference<ElementType, orientation> vectorA, ConstVectorReference<ElementType, orientation> vectorB, VectorReference<ElementType, orientation> vectorC)
-    {
-        DEBUG_CHECK_SIZES(vectorA.Size() != vectorB.Size() || vectorA.Size() != vectorB.Size(), "Incompatible vector sizes.");
-
-        const ElementType* pVectorAData = vectorA.GetConstDataPointer();
-        const ElementType* pVectorBData = vectorB.GetConstDataPointer();
-
-        size_t i = 0;
-        const ElementType* end = vectorA.GetConstDataPointer() + vectorA.GetIncrement() * vectorA.Size();
-
-        while (pVectorAData < end)
-        {
-            vectorC[i++] = (*pVectorAData) * (*pVectorBData);
-            pVectorAData += vectorA.GetIncrement();
-            pVectorBData += vectorB.GetIncrement();
-        }
-    }
-
-    template <ImplementationType implementation, typename ElementType>
-    void InnerProduct(ConstRowVectorReference<ElementType> vectorA, ConstColumnVectorReference<ElementType> vectorB, ElementType& result)
-    {
-        DEBUG_CHECK_SIZES(vectorA.Size() != vectorB.Size(), "Incompatible vector sizes.");
-
-        Internal::VectorOperations<implementation>::InnerProduct(vectorA, vectorB, result);
-    }
-
-    template <typename ElementType>
-    ElementType operator*(ConstRowVectorReference<ElementType> vectorA, ConstColumnVectorReference<ElementType> vectorB)
-    {
-        ElementType result;
-        InnerProduct(vectorA, vectorB, result);
-        return result;
-    }
-
-    template <typename ElementType>
-    ElementType Dot(UnorientedConstVectorBase<ElementType> vectorA, UnorientedConstVectorBase<ElementType> vectorB)
-    {
-        ConstRowVectorReference<ElementType> rowVector(vectorA.GetConstDataPointer(), vectorA.Size(), vectorA.GetIncrement());
-        ConstColumnVectorReference<ElementType> columnVector(vectorB.GetConstDataPointer(), vectorB.Size(), vectorB.GetIncrement());
-
-        ElementType result;
-        InnerProduct(rowVector, columnVector, result);
-        return result;
-    }
-
-    template <ImplementationType implementation, typename ElementType, MatrixLayout layout>
-    void OuterProduct(ConstColumnVectorReference<ElementType> vectorA, ConstRowVectorReference<ElementType> vectorB, MatrixReference<ElementType, layout> matrix)
-    {
-        DEBUG_CHECK_SIZES(vectorA.Size() != matrix.NumRows() || vectorB.Size() != matrix.NumColumns(), "Incompatible vector matrix sizes.");
-
-        Internal::VectorOperations<implementation>::OuterProduct(vectorA, vectorB, matrix);
-    }
-
-    template <typename ElementType, VectorOrientation orientation>
-    void CumulativeSumUpdate(VectorReference<ElementType, orientation> vector)
-    {
-        ElementType* pData = vector.GetDataPointer();
-        const ElementType* pEnd = pData + vector.GetIncrement() * vector.Size();
-        ElementType sum = (*pData);
-        pData += vector.GetIncrement();
-
-        while (pData < pEnd)
-        {
-            sum += (*pData);
-            (*pData) = sum;
-            pData += vector.GetIncrement();
-        }
-    }
-
-    template <typename ElementType, VectorOrientation orientation>
-    void ConsecutiveDifferenceUpdate(VectorReference<ElementType, orientation> vector)
-    {
-        ElementType* pData = vector.GetDataPointer();
-        const ElementType* pEnd = pData + vector.GetIncrement() * vector.Size();
-        ElementType previous = (*pData);
-        pData += vector.GetIncrement();
-
-        while (pData < pEnd)
-        {
-            ElementType sum = (*pData);
-            (*pData) -= previous;
-            previous = sum;
-            pData += vector.GetIncrement();
-        }
-    }
-
-    template <typename ElementType, VectorOrientation orientation, typename TransformationType>
-    void TransformUpdate(TransformationType transformation, VectorReference<ElementType, orientation> vector)
-    {
-        vector.Transform(transformation);
-    }
-
-    template <typename ElementType, VectorOrientation orientation, typename TransformationType>
-    void TransformSet(TransformationType transformation, ConstVectorReference<ElementType, orientation> vector, VectorReference<ElementType, orientation> output)
-    {
-        DEBUG_CHECK_SIZES(vector.Size() != output.Size(), "Incompatible vector sizes.");
-
-        ElementType* pOutputData = output.GetDataPointer();
-        const ElementType* pVectorData = vector.GetConstDataPointer();
-        const ElementType* pOutputEnd = pOutputData + output.Size() * output.GetIncrement();
-        while (pOutputData < pOutputEnd)
-        {
-            *pOutputData = transformation(*pVectorData);
-            pOutputData += output.GetIncrement();
-            pVectorData += vector.GetIncrement();
-        }
-    }
-
-    template <typename ElementType, VectorOrientation orientation, typename TransformationType>
-    void TransformAddUpdate(TransformationType transformation, ConstVectorReference<ElementType, orientation> vectorA, VectorReference<ElementType, orientation> vectorB)
-    {
-        DEBUG_CHECK_SIZES(vectorA.Size() != vectorB.Size(), "Incompatible vector sizes.");
-
-        ElementType* pVectorBData = vectorB.GetDataPointer();
-        const ElementType* pVectorAData = vectorA.GetConstDataPointer();
-        const ElementType* pVectorBEnd = pVectorBData + vectorB.Size() * vectorB.GetIncrement();
-        while (pVectorBData < pVectorBEnd)
-        {
-            *pVectorBData += transformation(*pVectorAData);
-            pVectorBData += vectorB.GetIncrement();
-            pVectorAData += vectorA.GetIncrement();
-        }
-    }
-
-    //
-    // NativeVectorOperations
-    //
-    namespace Internal
-    {
-        template <typename ElementType>
-        void VectorOperations<ImplementationType::native>::InnerProduct(ConstRowVectorReference<ElementType> vectorA, ConstColumnVectorReference<ElementType> vectorB, ElementType& result)
-        {
-            const ElementType* pVectorAData = vectorA.GetConstDataPointer();
-            const ElementType* pVectorBData = vectorB.GetConstDataPointer();
-            const ElementType* pVectorAEnd = pVectorAData + vectorA.GetIncrement() * vectorA.Size();
-            result = 0;
-
-            while (pVectorAData < pVectorAEnd)
-            {
-                result += (*pVectorAData) * (*pVectorBData);
-                pVectorAData += vectorA.GetIncrement();
-                pVectorBData += vectorB.GetIncrement();
-            }
-        }
-
-        template <typename ElementType, MatrixLayout layout>
-        void VectorOperations<ImplementationType::native>::OuterProduct(ConstColumnVectorReference<ElementType> vectorA, ConstRowVectorReference<ElementType> vectorB, MatrixReference<ElementType, layout> matrix)
-        {
-            for (size_t i = 0; i < matrix.NumRows(); ++i)
-            {
-                for (size_t j = 0; j < matrix.NumColumns(); ++j)
-                {
-                    matrix(i, j) = vectorA[i] * vectorB[j];
-                }
-            }
-        }
-
-        template <typename ElementType, VectorOrientation orientation, typename BinaryOperation>
-        void UnaryVectorUpdateImplementation(VectorReference<ElementType, orientation> vector, BinaryOperation unaryOperation)
-        {
-            ElementType* pData = vector.GetDataPointer();
-            const ElementType* pEnd = pData + vector.GetIncrement() * vector.Size();
-
-            while (pData < pEnd)
-            {
-                unaryOperation(*pData);
-                pData += vector.GetIncrement();
-            }
-        }
-
-        template <typename ElementType, VectorOrientation orientation, typename BinaryOperation>
-        void BinaryVectorUpdateImplementation(ConstVectorReference<ElementType, orientation> vectorA, VectorReference<ElementType, orientation> vectorB, BinaryOperation binaryOperation)
-        {
-            ElementType* pVectorBData = vectorB.GetDataPointer();
-            const ElementType* pVectorAData = vectorA.GetConstDataPointer();
-            const ElementType* pVectorBEnd = pVectorBData + vectorB.GetIncrement() * vectorB.Size();
-
-            while (pVectorBData < pVectorBEnd)
-            {
-                binaryOperation(*pVectorAData, *pVectorBData);
-                pVectorBData += vectorB.GetIncrement();
-                pVectorAData += vectorA.GetIncrement();
-            }
-        }
-
-        template <typename ElementType, VectorOrientation orientation, typename TrinaryOperation>
-        void TrinaryVectorUpdateImplementation(ConstVectorReference<ElementType, orientation> vectorA, ConstVectorReference<ElementType, orientation> vectorB, VectorReference<ElementType, orientation> output, TrinaryOperation trinaryOperation)
-        {
-            ElementType* pOutputData = output.GetDataPointer();
-            const ElementType* pVectortAData = vectorA.GetConstDataPointer();
-            const ElementType* pVectorBData = vectorB.GetConstDataPointer();
-            const ElementType* pOutputEnd = pOutputData + output.GetIncrement() * output.Size();
-
-            while (pOutputData < pOutputEnd)
-            {
-                trinaryOperation(*pVectortAData, *pVectorBData, *pOutputData);
-                pVectortAData += vectorA.GetIncrement();
-                pVectorBData += vectorB.GetIncrement();
-                pOutputData += output.GetIncrement();
-            }
-        }
-
-        template <typename ElementType, VectorOrientation orientation>
-        void VectorOperations<ImplementationType::native>::AddUpdate(ElementType scalar, VectorReference<ElementType, orientation> vector)
-        {
-            UnaryVectorUpdateImplementation(vector, [scalar](ElementType& v) { v += scalar; });
-        }
-
-        // vectorB += scalarA
-        template <typename ElementType, VectorOrientation orientation>
-        void VectorOperations<ImplementationType::native>::AddUpdate(ConstVectorReference<ElementType, orientation> vectorA, VectorReference<ElementType, orientation> vectorB)
-        {
-            BinaryVectorUpdateImplementation(vectorA, vectorB, [](ElementType a, ElementType& b) { b += a; });
-        }
-
-        // output = scalar + vector
-        template <typename ElementType, VectorOrientation orientation>
-        void VectorOperations<ImplementationType::native>::AddSet(ElementType scalar, ConstVectorReference<ElementType, orientation> vector, VectorReference<ElementType, orientation> output)
-        {
-            BinaryVectorUpdateImplementation(vector, output, [scalar](ElementType a, ElementType& o) { o = scalar + a; });
-        }
-
-        // output = vectorA + vectorB
-        template <typename ElementType, VectorOrientation orientation>
-        void VectorOperations<ImplementationType::native>::AddSet(ConstVectorReference<ElementType, orientation> vectorA, ConstVectorReference<ElementType, orientation> vectorB, VectorReference<ElementType, orientation> output)
-        {
-            TrinaryVectorUpdateImplementation(vectorA, vectorB, output, [](ElementType a, ElementType b, ElementType& o) { o = a + b; });
-        }
-
-        // vector *= scalar
-        template <typename ElementType, VectorOrientation orientation>
-        void VectorOperations<ImplementationType::native>::ScaleUpdate(ElementType scalar, VectorReference<ElementType, orientation> vector)
-        {
-            UnaryVectorUpdateImplementation(vector, [scalar](ElementType& v) { v *= scalar; });
-        }
-
-        // output = scalar * vector
-        template <typename ElementType, VectorOrientation orientation>
-        void VectorOperations<ImplementationType::native>::ScaleSet(ElementType scalar, ConstVectorReference<ElementType, orientation> vector, VectorReference<ElementType, orientation> output)
-        {
-            BinaryVectorUpdateImplementation(vector, output, [scalar](ElementType a, ElementType& o) { o = scalar * a; });
-        }
-
-        // vectorB += scalarA * vectorA
-        template <typename ElementType, VectorOrientation orientation>
-        void VectorOperations<ImplementationType::native>::ScaleAddUpdate(ElementType scalarA, ConstVectorReference<ElementType, orientation> vectorA, One, VectorReference<ElementType, orientation> vectorB)
-        {
-            BinaryVectorUpdateImplementation(vectorA, vectorB, [scalarA](ElementType a, ElementType& b) { b += scalarA * a; });
-        }
-
-        // vectorB =  scalarA + scalarB * vectorB
-        template <typename ElementType, VectorOrientation orientation>
-        void VectorOperations<ImplementationType::native>::ScaleAddUpdate(ElementType scalarA, OnesVector, ElementType scalarB, VectorReference<ElementType, orientation> vectorB)
-        {
-            UnaryVectorUpdateImplementation(vectorB, [scalarA, scalarB](ElementType& b) { b = scalarA + scalarB * b; });
-        }
-
-        // vectorB =  vectorA + scalarB * vectorB
-        template <typename ElementType, VectorOrientation orientation>
-        void VectorOperations<ImplementationType::native>::ScaleAddUpdate(One, ConstVectorReference<ElementType, orientation> vectorA, ElementType scalarB, VectorReference<ElementType, orientation> vectorB)
-        {
-            BinaryVectorUpdateImplementation(vectorA, vectorB, [scalarB](ElementType a, ElementType& b) { b = a + scalarB * b; });
-        }
-
-        // vectorB =  scalarA * vectorA + scalarB * vectorB
-        template <typename ElementType, VectorOrientation orientation>
-        void VectorOperations<ImplementationType::native>::ScaleAddUpdate(ElementType scalarA, ConstVectorReference<ElementType, orientation> vectorA, ElementType scalarB, VectorReference<ElementType, orientation> vectorB)
-        {
-            BinaryVectorUpdateImplementation(vectorA, vectorB, [scalarA, scalarB](ElementType a, ElementType& b) { b = scalarA * a + scalarB * b; });
-        }
-
-        // output = scalarA * vectorA + vectorB
-        template <typename ElementType, VectorOrientation orientation>
-        void VectorOperations<ImplementationType::native>::ScaleAddSet(ElementType scalarA, ConstVectorReference<ElementType, orientation> vectorA, One, ConstVectorReference<ElementType, orientation> vectorB, VectorReference<ElementType, orientation> output)
-        {
-            TrinaryVectorUpdateImplementation(vectorA, vectorB, output, [scalarA](ElementType a, ElementType b, ElementType& o) { o = scalarA * a + b; });
-        }
-
-        // output = scalarA * ones + scalarB * vectorB
-        template <typename ElementType, VectorOrientation orientation>
-        void VectorOperations<ImplementationType::native>::ScaleAddSet(ElementType scalarA, OnesVector, ElementType scalarB, ConstVectorReference<ElementType, orientation> vectorB, VectorReference<ElementType, orientation> output)
-        {
-            BinaryVectorUpdateImplementation(vectorB, output, [scalarA, scalarB](ElementType b, ElementType& o) { o = scalarA + scalarB * b; });
-        }
-
-        // vectorB = vectorA + scalarB * vectorB
-        template <typename ElementType, VectorOrientation orientation>
-        void VectorOperations<ImplementationType::native>::ScaleAddSet(One, ConstVectorReference<ElementType, orientation> vectorA, ElementType scalarB, ConstVectorReference<ElementType, orientation> vectorB, VectorReference<ElementType, orientation> output)
-        {
-            TrinaryVectorUpdateImplementation(vectorA, vectorB, output, [scalarB](ElementType a, ElementType b, ElementType& o) { o = a + scalarB * b; });
-        }
-
-        // output = scalarA * vectorA + scalarB * vectorB
-        template <typename ElementType, VectorOrientation orientation>
-        void VectorOperations<ImplementationType::native>::ScaleAddSet(ElementType scalarA, ConstVectorReference<ElementType, orientation> vectorA, ElementType scalarB, ConstVectorReference<ElementType, orientation> vectorB, VectorReference<ElementType, orientation> output)
-        {
-            TrinaryVectorUpdateImplementation(vectorA, vectorB, output, [scalarA, scalarB](ElementType a, ElementType b, ElementType& o) { o = scalarA * a + scalarB * b; });
-        }
-
-#ifdef USE_BLAS
-        //
-        // OpenBlasVectorOperations
-        //
-
-        template <typename ElementType>
-        void VectorOperations<ImplementationType::openBlas>::InnerProduct(ConstRowVectorReference<ElementType> vectorA, ConstColumnVectorReference<ElementType> vectorB, ElementType& result)
-        {
-            result = Blas::Dot(static_cast<int>(vectorA.Size()), vectorA.GetConstDataPointer(), static_cast<int>(vectorA.GetIncrement()), vectorB.GetConstDataPointer(), static_cast<int>(vectorB.GetIncrement()));
-        }
-
-        template <typename ElementType, MatrixLayout layout>
-        void VectorOperations<ImplementationType::openBlas>::OuterProduct(ConstColumnVectorReference<ElementType> vectorA, ConstRowVectorReference<ElementType> vectorB, MatrixReference<ElementType, layout> matrix)
-        {
-            matrix.Reset();
-            Blas::Ger(matrix.GetLayout(), static_cast<int>(matrix.NumRows()), static_cast<int>(matrix.NumColumns()), static_cast<ElementType>(1.0), vectorA.GetConstDataPointer(), static_cast<int>(vectorA.GetIncrement()), vectorB.GetConstDataPointer(), static_cast<int>(vectorB.GetIncrement()), matrix.GetDataPointer(), static_cast<int>(matrix.GetIncrement()));
-        }
-
-        template <typename ElementType, VectorOrientation orientation>
-        void VectorOperations<ImplementationType::openBlas>::AddSet(ElementType scalar, ConstVectorReference<ElementType, orientation> vector, VectorReference<ElementType, orientation> output)
-        {
-            output.Fill(scalar);
-            AddUpdate(vector, output);
-        }
-
-        template <typename ElementType, VectorOrientation orientation>
-        void VectorOperations<ImplementationType::openBlas>::AddSet(ConstVectorReference<ElementType, orientation> vectorA, ConstVectorReference<ElementType, orientation> vectorB, VectorReference<ElementType, orientation> output)
-        {
-            output.CopyFrom(vectorA);
-            AddUpdate(vectorB, output);
-        }
-
-        template <typename ElementType, VectorOrientation orientation>
-        void VectorOperations<ImplementationType::openBlas>::AddUpdate(ElementType scalar, VectorReference<ElementType, orientation> vector)
-        {
-            UnaryVectorUpdateImplementation(vector, [scalar](ElementType& v) { v += scalar; });
-        }
-
-        // vectorB += vectorA
-        template <typename ElementType, VectorOrientation orientation>
-        void VectorOperations<ImplementationType::openBlas>::AddUpdate(ConstVectorReference<ElementType, orientation> vectorA, VectorReference<ElementType, orientation> vectorB)
-        {
-            ScaleAddUpdate(static_cast<ElementType>(1.0), vectorA, One(), vectorB);
-        }
-
-        // vector *= scalar
-        template <typename ElementType, VectorOrientation orientation>
-        void VectorOperations<ImplementationType::openBlas>::ScaleUpdate(ElementType scalar, VectorReference<ElementType, orientation> vector)
-        {
-            Blas::Scal(static_cast<int>(vector.Size()), scalar, vector.GetDataPointer(), static_cast<int>(vector.GetIncrement()));
-        }
-
-        // output = scalar * vector
-        template <typename ElementType, VectorOrientation orientation>
-        void VectorOperations<ImplementationType::openBlas>::ScaleSet(ElementType scalar, ConstVectorReference<ElementType, orientation> vector, VectorReference<ElementType, orientation> output)
-        {
-            ScaleAddUpdate(scalar, vector, static_cast<ElementType>(0.0), output);
-        }
-
-        // vectorB += scalarA * vectorA
-        template <typename ElementType, VectorOrientation orientation>
-        void VectorOperations<ImplementationType::openBlas>::ScaleAddUpdate(ElementType scalarA, ConstVectorReference<ElementType, orientation> vectorA, One, VectorReference<ElementType, orientation> vectorB)
-        {
-            Blas::Axpy(static_cast<int>(vectorB.Size()), scalarA, vectorA.GetConstDataPointer(), static_cast<int>(vectorA.GetIncrement()), vectorB.GetDataPointer(), static_cast<int>(vectorB.GetIncrement()));
-        }
-
-        // vectorB =  scalarA + scalarB * vectorB
-        template <typename ElementType, VectorOrientation orientation>
-        void VectorOperations<ImplementationType::openBlas>::ScaleAddUpdate(ElementType scalarA, OnesVector, ElementType scalarB, VectorReference<ElementType, orientation> vectorB)
-        {
-            Blas::Scal(static_cast<int>(vectorB.Size()), scalarB, vectorB.GetDataPointer(), static_cast<int>(vectorB.GetIncrement()));
-            math::AddUpdate(scalarA, vectorB);
-        }
-
-        // vectorB =  vectorA + scalarB * vectorB
-        template <typename ElementType, VectorOrientation orientation>
-        void VectorOperations<ImplementationType::openBlas>::ScaleAddUpdate(One, ConstVectorReference<ElementType, orientation> vectorA, ElementType scalarB, VectorReference<ElementType, orientation> vectorB)
-        {
-            Blas::Scal(static_cast<int>(vectorB.Size()), scalarB, vectorB.GetDataPointer(), static_cast<int>(vectorB.GetIncrement()));
-            AddUpdate(vectorA, vectorB);
-        }
-
-        // vectorB = scalarA * vectorA + scalarB * vectorB
-        template <typename ElementType, VectorOrientation orientation>
-        void VectorOperations<ImplementationType::openBlas>::ScaleAddUpdate(ElementType scalarA, ConstVectorReference<ElementType, orientation> vectorA, ElementType scalarB, VectorReference<ElementType, orientation> vectorB)
-        {
-            Blas::Scal(static_cast<int>(vectorB.Size()), scalarB, vectorB.GetDataPointer(), static_cast<int>(vectorB.GetIncrement()));
-            Blas::Axpy(static_cast<int>(vectorB.Size()), scalarA, vectorA.GetConstDataPointer(), static_cast<int>(vectorA.GetIncrement()), vectorB.GetDataPointer(), static_cast<int>(vectorB.GetIncrement()));
-        }
-
-        // output = scalarA * vectorA + vectorB
-        template <typename ElementType, VectorOrientation orientation>
-        void VectorOperations<ImplementationType::openBlas>::ScaleAddSet(ElementType scalarA, ConstVectorReference<ElementType, orientation> vectorA, One, ConstVectorReference<ElementType, orientation> vectorB, VectorReference<ElementType, orientation> output)
-        {
-            output.CopyFrom(vectorB);
-            ScaleAddUpdate(scalarA, vectorA, One(), output);
-        }
-
-        // vectorC = scalarA * ones + scalarB * vectorB
-        template <typename ElementType, VectorOrientation orientation>
-        void VectorOperations<ImplementationType::openBlas>::ScaleAddSet(ElementType scalarA, OnesVector, ElementType scalarB, ConstVectorReference<ElementType, orientation> vectorB, VectorReference<ElementType, orientation> output)
-        {
-            output.Fill(scalarA);
-            ScaleAddUpdate(scalarB, vectorB, One(), output);
-        }
-
-        // output = vectorA + scalarB * vectorB
-        template <typename ElementType, VectorOrientation orientation>
-        void VectorOperations<ImplementationType::openBlas>::ScaleAddSet(One, ConstVectorReference<ElementType, orientation> vectorA, ElementType scalarB, ConstVectorReference<ElementType, orientation> vectorB, VectorReference<ElementType, orientation> output)
-        {
-            ScaleSet(scalarB, vectorB, output);
-            AddUpdate(vectorA, output);
-        }
-
-        // vectorC = scalarA * vectorA + scalarB * vectorB
-        template <typename ElementType, VectorOrientation orientation>
-        void VectorOperations<ImplementationType::openBlas>::ScaleAddSet(ElementType scalarA, ConstVectorReference<ElementType, orientation> vectorA, ElementType scalarB, ConstVectorReference<ElementType, orientation> vectorB, VectorReference<ElementType, orientation> output)
-        {
-            ScaleSet(scalarA, vectorA, output);
-            ScaleAddUpdate(scalarB, vectorB, One(), output);
-        }
-
-#endif // USE_BLAS
-    } // namespace Internal
-} // namespace math
-} // namespace ell
diff --git a/libraries/math/test/include/Matrix_test.h b/libraries/math/test/include/Matrix_test.h
index e129373f4..8b92aab4b 100644
--- a/libraries/math/test/include/Matrix_test.h
+++ b/libraries/math/test/include/Matrix_test.h
@@ -208,4 +208,1435 @@ void TestMatrixColumnwiseConsecutiveDifferenceUpdate();
 template <typename ElementType, math::MatrixLayout layout>
 void TestMatrixArchiver();
 
-#include "../tcc/Matrix_test.tcc"
+#pragma region implementation
+
+template <typename ElementType, math::MatrixLayout layout>
+void TestMatrixNumRows()
+{
+    math::Matrix<ElementType, layout> M{
+        { 1, 0, 4, 0 },
+        { 0, 0, 0, 0 },
+        { 0, 0, 0, 7 }
+    };
+
+    auto N = M.GetSubMatrix(0, 1, 2, 2);
+
+    testing::ProcessTest("Matrix::Operator", M.NumRows() == 3 && N.NumRows() == 2);
+}
+
+template <typename ElementType, math::MatrixLayout layout>
+void TestMatrixNumColumns()
+{
+    math::Matrix<ElementType, layout> M{
+        { 1, 0, 4, 0 },
+        { 0, 0, 0, 0 },
+        { 0, 0, 0, 7 }
+    };
+
+    auto N = M.GetSubMatrix(0, 1, 2, 2);
+
+    testing::ProcessTest("Matrix::NumColumns", M.NumColumns() == 4 && N.NumColumns() == 2);
+}
+
+template <typename ElementType, math::MatrixLayout layout>
+void TestMatrixSize()
+{
+    math::Matrix<ElementType, layout> M{
+        { 1, 0, 4, 0 },
+        { 0, 0, 0, 0 },
+        { 0, 0, 0, 7 }
+    };
+
+    auto N = M.GetSubMatrix(0, 1, 2, 2);
+
+    testing::ProcessTest("Matrix::Size", M.Size() == 12 && N.Size() == 4);
+}
+
+template <typename ElementType>
+void TestMatrixGetIncrement()
+{
+    math::ColumnMatrix<ElementType> M{
+        { 1, 0, 4, 0 },
+        { 0, 0, 0, 0 },
+        { 0, 0, 0, 7 }
+    };
+
+    math::RowMatrix<ElementType> N{
+        { 1, 0, 4, 0 },
+        { 0, 0, 0, 0 },
+        { 0, 0, 0, 7 }
+    };
+
+    testing::ProcessTest("Matrix::GetIncrement", M.GetIncrement() == 3 && N.GetIncrement() == 4);
+}
+
+template <typename ElementType>
+void TestMatrixGetMinorSize()
+{
+    math::ColumnMatrix<ElementType> M{
+        { 1, 0, 4, 0 },
+        { 0, 0, 0, 0 },
+        { 0, 0, 0, 7 }
+    };
+
+    math::RowMatrix<ElementType> N{
+        { 1, 0, 4, 0 },
+        { 0, 0, 0, 0 },
+        { 0, 0, 0, 7 }
+    };
+
+    testing::ProcessTest("Matrix::GetMinorSize", M.GetMinorSize() == 4 && N.GetMinorSize() == 3);
+}
+
+template <typename ElementType>
+void TestMatrixGetMajorSize()
+{
+    math::ColumnMatrix<ElementType> M{
+        { 1, 0, 4, 0 },
+        { 0, 0, 0, 0 },
+        { 0, 0, 0, 7 }
+    };
+
+    math::RowMatrix<ElementType> N{
+        { 1, 0, 4, 0 },
+        { 0, 0, 0, 0 },
+        { 0, 0, 0, 7 }
+    };
+
+    testing::ProcessTest("Matrix::GetMajorSize", M.GetMajorSize() == 3 && N.GetMajorSize() == 4);
+}
+
+template <typename ElementType>
+void TestMatrixGetRowIncrement()
+{
+    math::ColumnMatrix<ElementType> M{
+        { 1, 0, 4, 0 },
+        { 0, 0, 0, 0 },
+        { 0, 0, 0, 7 }
+    };
+
+    math::RowMatrix<ElementType> N{
+        { 1, 0, 4, 0 },
+        { 0, 0, 0, 0 },
+        { 0, 0, 0, 7 }
+    };
+
+    testing::ProcessTest("Matrix::GetRowIncrement", M.GetRowIncrement() == 1 && N.GetRowIncrement() == 4);
+}
+
+template <typename ElementType>
+void TestMatrixGetColumnIncrement()
+{
+    math::ColumnMatrix<ElementType> M{
+        { 1, 0, 4, 0 },
+        { 0, 0, 0, 0 },
+        { 0, 0, 0, 7 }
+    };
+
+    math::RowMatrix<ElementType> N{
+        { 1, 0, 4, 0 },
+        { 0, 0, 0, 0 },
+        { 0, 0, 0, 7 }
+    };
+
+    testing::ProcessTest("Matrix::GetColumnIncrement", M.GetColumnIncrement() == 3 && N.GetColumnIncrement() == 1);
+}
+
+template <typename ElementType, math::MatrixLayout layout>
+void TestMatrixIndexer()
+{
+    math::Matrix<ElementType, layout> M(3, 4);
+    M(0, 0) = 1;
+    M(0, 2) = 4;
+    M(2, 3) = 7;
+
+    auto N = M.GetSubMatrix(0, 1, 2, 2);
+    N(1, 0) = 3;
+
+    math::ColumnMatrix<ElementType> R{
+        { 1, 0, 4, 0 },
+        { 0, 3, 0, 0 },
+        { 0, 0, 0, 7 }
+    };
+
+    testing::ProcessTest("Matrix::Operator()", M == R);
+}
+
+template <typename ElementType, math::MatrixLayout layout>
+void TestMatrixGetDataPointer()
+{
+    math::Matrix<ElementType, layout> M{
+        { 1, 0, 4, 0 },
+        { 0, 0, 0, 0 },
+        { 0, 0, 0, 7 }
+    };
+
+    auto S = M.GetSubMatrix(1, 1, 2, 2);
+
+    testing::ProcessTest("Matrix::GetDataPointer", M.GetDataPointer() == &(M(0, 0)) && S.GetDataPointer() == &(M(1, 1)) && M.GetConstDataPointer() == &(M(0, 0)) && S.GetConstDataPointer() == &(M(1, 1)));
+}
+
+template <typename ElementType, math::MatrixLayout layout>
+void TestMatrixGetLayout()
+{
+    math::Matrix<ElementType, layout> M{
+        { 1, 0, 4, 0 },
+        { 0, 0, 0, 0 },
+        { 0, 0, 0, 7 }
+    };
+
+    auto N = M.GetSubMatrix(0, 1, 2, 2);
+
+    testing::ProcessTest("Matrix::GetLayout", M.GetLayout() == layout && N.GetLayout() == layout);
+}
+
+template <typename ElementType, math::MatrixLayout layout>
+void TestMatrixIsContiguous()
+{
+    math::Matrix<ElementType, layout> M{
+        { 1, 0, 4, 0 },
+        { 0, 0, 0, 7 }
+    };
+
+    auto S = M.GetSubMatrix(1, 1, 1, 2);
+
+    testing::ProcessTest("Matrix::IsContiguous", M.IsContiguous() == true && S.IsContiguous() == false);
+}
+
+template <typename ElementType>
+void TestMatrixToArray()
+{
+    math::ColumnMatrix<ElementType> M{
+        { 1, 0, 4, 0 },
+        { 0, 0, 0, 7 }
+    };
+
+    math::RowMatrix<ElementType> N{
+        { 1, 0, 4, 0 },
+        { 0, 0, 0, 7 }
+    };
+
+    auto Sm = M.GetSubMatrix(0, 1, 2, 2);
+    auto Sn = N.GetSubMatrix(0, 1, 2, 2);
+
+    std::vector<ElementType> v{ 1, 0, 0, 0, 4, 0, 0, 7 };
+    std::vector<ElementType> u{ 0, 0, 4, 0 };
+    std::vector<ElementType> x{ 1, 0, 4, 0, 0, 0, 0, 7 };
+    std::vector<ElementType> y{ 0, 4, 0, 0 };
+
+    testing::ProcessTest("Matrix::ToArray", M.ToArray() == v && Sm.ToArray() == u && N.ToArray() == x && Sn.ToArray() == y);
+}
+
+template <typename ElementType, math::MatrixLayout layout>
+void TestMatrixSwap()
+{
+    math::Matrix<ElementType, layout> M{
+        { 1, 0, 4, 0 },
+        { 0, 0, 0, 7 }
+    };
+
+    math::Matrix<ElementType, layout> N{
+        { 1, 3 },
+        { 0, 3 }
+    };
+
+    math::Matrix<ElementType, layout> S{
+        { 1, 3 },
+        { 0, 3 }
+    };
+
+    M.Swap(N);
+
+    testing::ProcessTest("Matrix::Swap", M == S);
+}
+
+template <typename ElementType, math::MatrixLayout layout>
+void TestMatrixIsEqual()
+{
+    math::Matrix<ElementType, layout> M{
+        { 1, 0, 4, 0 },
+        { 0, 0, 0, 7 }
+    };
+
+    math::Matrix<ElementType, layout> N{
+        { 1, 0, 4, 0 },
+        { 0, 0, 0, 7 }
+    };
+
+    math::Matrix<ElementType, math::TransposeMatrixLayout<layout>::value> S{
+        { 1, 0, 4, 0 },
+        { 0, 0, 0, 7 }
+    };
+
+    auto A = M.GetSubMatrix(0, 1, 2, 2);
+
+    math::RowMatrix<ElementType> T{
+        { 0, 4 },
+        { 0, 0 }
+    };
+
+    testing::ProcessTest("Matrix::IsEqual", M.IsEqual(N) && M.IsEqual(S) && A.IsEqual(T) && T.IsEqual(A));
+}
+
+template <typename ElementType, math::MatrixLayout layout>
+void TestMatrixEqualityOperator()
+{
+    math::Matrix<ElementType, layout> M{
+        { 1, 0, 4, 0 },
+        { 0, 0, 0, 7 }
+    };
+
+    math::Matrix<ElementType, layout> N{
+        { 1, 0, 4, 0 },
+        { 0, 0, 0, 7 }
+    };
+
+    math::Matrix<ElementType, math::TransposeMatrixLayout<layout>::value> S{
+        { 1, 0, 4, 0 },
+        { 0, 0, 0, 7 }
+    };
+
+    auto A = M.GetSubMatrix(0, 1, 2, 2);
+
+    math::RowMatrix<ElementType> T{
+        { 0, 4 },
+        { 0, 0 }
+    };
+
+    testing::ProcessTest("Matrix::operator==", M == N && M == S && A == T && T == A);
+}
+
+template <typename ElementType, math::MatrixLayout layout>
+void TestMatrixInequalityOperator()
+{
+    math::Matrix<ElementType, layout> M{
+        { 1, 0, 4, 0 },
+        { 0, 0, 0, 7 }
+    };
+
+    math::Matrix<ElementType, layout> N{
+        { 1, 0, 4 },
+        { 0, 0, 0 }
+    };
+
+    math::Matrix<ElementType, layout> S{
+        { 1, 3, 4, 0 },
+        { 0, 0, 0, 7 }
+    };
+
+    auto A = M.GetSubMatrix(0, 1, 2, 2);
+    auto B = M.GetSubMatrix(0, 2, 2, 2);
+
+    testing::ProcessTest("Matrix::operator!=", M != N && M != S && A != B);
+}
+
+template <typename ElementType, math::MatrixLayout layout>
+void TestMatrixGetConstReference()
+{
+    math::Matrix<ElementType, layout> M{
+        { 1, 0, 4, 0 },
+        { 0, 0, 0, 7 }
+    };
+
+    auto N = M.GetConstReference();
+    auto A = M.GetSubMatrix(0, 1, 2, 2);
+    auto B = A.GetConstReference();
+
+    testing::ProcessTest("Matrix::GetConstReference", M == N && A == B);
+}
+
+template <typename ElementType, math::MatrixLayout layout>
+void TestMatrixGetSubMatrix()
+{
+    math::Matrix<ElementType, layout> M{
+        { 1, 0, 4, 0 },
+        { 0, 0, 0, 0 },
+        { 0, 0, 0, 7 }
+    };
+
+    auto N = M.GetSubMatrix(1, 1, 2, 3);
+    N.Fill(3);
+    N(0, 1) = 4;
+
+    auto S = N.GetSubMatrix(0, 1, 2, 2);
+
+    math::RowMatrix<ElementType> R{
+        { 1, 0, 4, 0 },
+        { 0, 3, 4, 3 },
+        { 0, 3, 3, 3 }
+    };
+
+    math::RowMatrix<ElementType> A{
+        { 4, 3 },
+        { 3, 3 }
+    };
+
+    testing::ProcessTest("Matrix::GetSubMatrix", M == R && S == A);
+}
+
+template <typename ElementType, math::MatrixLayout layout>
+void TestMatrixGetColumn()
+{
+    math::Matrix<ElementType, layout> M{
+        { 1, 0, 4, 0 },
+        { 0, 3, 4, 3 },
+        { 0, 3, 5, 6 }
+    };
+
+    auto u = M.GetColumn(1);
+    u[0] = 2;
+    u[1] = 2;
+    u[2] = 8;
+
+    auto N = M.GetSubMatrix(1, 1, 2, 3);
+    auto v = N.GetColumn(1);
+
+    math::RowMatrix<ElementType> R{
+        { 1, 2, 4, 0 },
+        { 0, 2, 4, 3 },
+        { 0, 8, 5, 6 }
+    };
+
+    math::ColumnVector<ElementType> w{ 4, 5 };
+
+    testing::ProcessTest("Matrix::GetColumn", M == R && v == w);
+}
+
+template <typename ElementType, math::MatrixLayout layout>
+void TestMatrixGetRow()
+{
+    math::Matrix<ElementType, layout> M{
+        { 1, 0, 4, 0 },
+        { 0, 3, 4, 3 },
+        { 0, 3, 5, 6 }
+    };
+
+    auto u = M.GetRow(1);
+    u[0] = 2;
+    u[1] = 2;
+    u[3] = 8;
+
+    auto N = M.GetSubMatrix(1, 1, 2, 3);
+    auto v = N.GetRow(1);
+
+    math::RowMatrix<ElementType> R{
+        { 1, 0, 4, 0 },
+        { 2, 2, 4, 8 },
+        { 0, 3, 5, 6 }
+    };
+
+    math::RowVector<ElementType> w{ 3, 5, 6 };
+
+    testing::ProcessTest("Matrix::GetRow", M == R && w == v);
+}
+
+template <typename ElementType, math::MatrixLayout layout>
+void TestMatrixGetDiagonal()
+{
+    math::Matrix<ElementType, layout> M{
+        { 1, 2, 4, 0 },
+        { 0, 2, 4, 3 },
+        { 0, 8, 5, 6 }
+    };
+
+    M.GetDiagonal().Fill(9);
+
+    auto N = M.GetSubMatrix(1, 1, 2, 3);
+    auto v = N.GetDiagonal();
+
+    math::RowMatrix<ElementType> R{
+        { 9, 2, 4, 0 },
+        { 0, 9, 4, 3 },
+        { 0, 8, 9, 6 }
+    };
+
+    math::ColumnVector<ElementType> u{ 9, 9 };
+
+    testing::ProcessTest("Matrix::GetDiagonal", M == R && u == v);
+}
+
+template <typename ElementType>
+void TestMatrixGetMajorVector()
+{
+    math::ColumnMatrix<ElementType> M{
+        { 1, 0, 4, 0 },
+        { 0, 0, 0, 7 }
+    };
+
+    math::RowMatrix<ElementType> N{
+        { 1, 0, 4, 0 },
+        { 0, 0, 0, 7 }
+    };
+
+    M.GetMajorVector(1).Fill(8);
+    N.GetMajorVector(1).Fill(8);
+
+    math::RowMatrix<ElementType> R1{
+        { 1, 8, 4, 0 },
+        { 0, 8, 0, 7 }
+    };
+
+    math::RowMatrix<ElementType> R2{
+        { 1, 0, 4, 0 },
+        { 8, 8, 8, 8 }
+    };
+
+    testing::ProcessTest("Matrix::GetMajorVector", M == R1 && N == R2);
+}
+
+template <typename ElementType, math::MatrixLayout layout>
+void TestMatrixTranspose()
+{
+    math::Matrix<ElementType, layout> M{
+        { 9, 2, 4, 0 },
+        { 0, 9, 4, 3 },
+        { 0, 8, 9, 6 }
+    };
+
+    auto T = M.Transpose();
+    auto N = M.GetSubMatrix(1, 1, 2, 2).Transpose();
+
+    math::RowMatrix<ElementType> R{
+        { 9, 0, 0 },
+        { 2, 9, 8 },
+        { 4, 4, 9 },
+        { 0, 3, 6 }
+    };
+
+    math::RowMatrix<ElementType> S{
+        { 9, 8 },
+        { 4, 9 }
+    };
+
+    testing::ProcessTest("Matrix::Transpose", T == R && N == S);
+}
+
+template <typename ElementType, math::MatrixLayout layout>
+void TestMatrixCopyFrom()
+{
+
+    math::Matrix<ElementType, layout> M(2, 4);
+
+    math::Matrix<ElementType, layout> N{
+        { 1, 0, 4, 0 },
+        { 0, 0, 0, 7 }
+    };
+
+    math::Matrix<ElementType, layout> S{
+        { 2, 6 },
+        { 3, 9 }
+    };
+
+    M.CopyFrom(N);
+    M.GetSubMatrix(0, 2, 2, 2).CopyFrom(S);
+
+    math::RowMatrix<ElementType> R{
+        { 1, 0, 2, 6 },
+        { 0, 0, 3, 9 }
+    };
+
+    testing::ProcessTest("Matrix::CopyFrom", M == R);
+}
+
+template <typename ElementType, math::MatrixLayout layout>
+void TestMatrixReset()
+{
+    math::Matrix<ElementType, layout> M{
+        { 1, 0, 4, 0 },
+        { 0, 0, 0, 7 }
+    };
+
+    math::Matrix<ElementType, layout> N{
+        { 1, 0, 4, 0 },
+        { 0, 0, 0, 7 }
+    };
+
+    M.Reset();
+    N.GetSubMatrix(0, 1, 2, 2).Reset();
+
+    math::RowMatrix<ElementType> R(2, 4);
+
+    math::RowMatrix<ElementType> T{
+        { 1, 0, 0, 0 },
+        { 0, 0, 0, 7 }
+    };
+
+    testing::ProcessTest("Matrix::Reset", M == R && N == T);
+}
+
+template <typename ElementType, math::MatrixLayout layout>
+void TestMatrixFill()
+{
+    math::Matrix<ElementType, layout> M{
+        { 1, 0, 4, 0 },
+        { 0, 0, 0, 7 }
+    };
+
+    math::Matrix<ElementType, layout> N{
+        { 1, 0, 4, 0 },
+        { 0, 0, 0, 7 }
+    };
+
+    M.Fill(-2);
+    N.GetSubMatrix(0, 1, 2, 2).Fill(-2);
+
+    math::RowMatrix<ElementType> R{
+        { -2, -2, -2, -2 },
+        { -2, -2, -2, -2 }
+    };
+
+    math::RowMatrix<ElementType> T{
+        { 1, -2, -2, 0 },
+        { 0, -2, -2, 7 }
+    };
+
+    testing::ProcessTest("Matrix::Fill", M == R && N == T);
+}
+
+template <typename ElementType, math::MatrixLayout layout>
+void TestMatrixGenerate()
+{
+    math::Matrix<ElementType, layout> M{
+        { 1, 0, 4, 0 },
+        { 0, 0, 0, 7 }
+    };
+
+    math::Matrix<ElementType, layout> N{
+        { 1, 0, 4, 0 },
+        { 0, 0, 0, 7 }
+    };
+
+    M.Generate([]() -> ElementType { return -2; });
+    N.GetSubMatrix(0, 1, 2, 2).Generate([]() -> ElementType { return -2; });
+
+    math::RowMatrix<ElementType> R{
+        { -2, -2, -2, -2 },
+        { -2, -2, -2, -2 }
+    };
+
+    math::RowMatrix<ElementType> T{
+        { 1, -2, -2, 0 },
+        { 0, -2, -2, 7 }
+    };
+
+    testing::ProcessTest("Matrix::Generate", M == R && N == T);
+}
+
+template <typename ElementType, math::MatrixLayout layout>
+void TestMatrixTransform()
+{
+    math::Matrix<ElementType, layout> M{
+        { 1, 0, 4, 0 },
+        { 0, 0, 0, 7 }
+    };
+
+    math::Matrix<ElementType, layout> N{
+        { 1, 0, 4, 0 },
+        { 0, 0, 0, 7 }
+    };
+
+    M.Transform([](ElementType x) { return 2 * x; });
+    N.GetSubMatrix(0, 1, 2, 2).Transform([](ElementType x) { return 2 * x; });
+
+    math::RowMatrix<ElementType> R{
+        { 2, 0, 8, 0 },
+        { 0, 0, 0, 14 }
+    };
+
+    math::RowMatrix<ElementType> T{
+        { 1, 0, 8, 0 },
+        { 0, 0, 0, 7 }
+    };
+
+    testing::ProcessTest("Matrix::Transform", M == R && N == T);
+}
+
+template <typename ElementType, math::MatrixLayout layout1, math::MatrixLayout layout2>
+void TestMatrixCopyCtor()
+{
+    math::Matrix<ElementType, layout1> M1{
+        { 1, 2, 3, 4, 5, 6, 7, 8, 9 },
+        { 1, 2, 3, 4, 5, 6, 7, 8, 9 },
+        { 1, 2, 3, 4, 5, 6, 7, 8, 9 },
+        { 1, 2, 3, 4, 5, 6, 7, 8, 9 }
+    };
+
+    math::Matrix<ElementType, layout2> M2(M1);
+
+    testing::ProcessTest("Matrix(Matrix)", M1 == M2);
+}
+
+template <typename ElementType, math::MatrixLayout layout>
+void TestMatrixPrint()
+{
+    math::Matrix<ElementType, layout> M{
+        { 1, 2, 0 },
+        { 0, 3, 7 }
+    };
+
+    std::stringstream stream1;
+    math::Print(M, stream1);
+    auto x = stream1.str();
+
+    std::stringstream stream2;
+    math::Print(M.GetSubMatrix(0, 1, 2, 2), stream2);
+    auto y = stream2.str();
+
+    testing::ProcessTest("Print(Matrix)", x == "{ { 1, 2, 0 },\n  { 0, 3, 7 } }\n" && y == "{ { 2, 0 },\n  { 3, 7 } }\n");
+}
+
+template <typename ElementType, math::MatrixLayout layout>
+void TestMatrixPlusEqualsOperatorScalar()
+{
+    math::Matrix<ElementType, layout> M{
+        { 1, 2, 0 },
+        { 0, 3, 7 }
+    };
+
+    M += -2;
+    M.GetSubMatrix(0, 1, 2, 2) += 1;
+
+    math::RowMatrix<ElementType> R{
+        { -1, 1, -1 },
+        { -2, 2, 6 }
+    };
+
+    testing::ProcessTest("Matrix::operator+=(scalar)", M == R);
+}
+
+template <typename ElementType, math::MatrixLayout layoutA, math::MatrixLayout layoutB, math::ImplementationType implementation>
+void TestMatrixPlusEqualsOperatorMatrix()
+{
+    math::Matrix<ElementType, layoutA> M{
+        { 1, 2, 0 },
+        { 0, 3, 7 }
+    };
+
+    math::Matrix<ElementType, layoutB> N{
+        { 4, 3, 2 },
+        { 1, 2, 1 }
+    };
+
+    M += N;
+    M.GetSubMatrix(0, 1, 2, 2) += N.GetSubMatrix(0, 0, 2, 2);
+
+    math::RowMatrix<ElementType> R{
+        { 5, 9, 5 },
+        { 1, 6, 10 }
+    };
+
+    testing::ProcessTest("Matrix::operator+=(Matrix)", M == R);
+}
+
+template <typename ElementType, math::MatrixLayout layout>
+void TestMatrixMinusEqualsOperatorScalar()
+{
+    math::Matrix<ElementType, layout> M{
+        { 1, 2, 0 },
+        { 0, 3, 7 }
+    };
+
+    M -= 2;
+    M.GetSubMatrix(0, 1, 2, 2) -= (-1);
+
+    math::RowMatrix<ElementType> R{
+        { -1, 1, -1 },
+        { -2, 2, 6 }
+    };
+
+    testing::ProcessTest("Matrix::operator-=(scalar)", M == R);
+}
+
+template <typename ElementType, math::MatrixLayout layoutA, math::MatrixLayout layoutB, math::ImplementationType implementation>
+void TestMatrixMinusEqualsOperatorMatrix()
+{
+    math::Matrix<ElementType, layoutA> M{
+        { 1, 2, 0 },
+        { 0, 3, 7 }
+    };
+
+    math::Matrix<ElementType, layoutB> N{
+        { -4, -3, -2 },
+        { -1, -2, -1 }
+    };
+
+    M -= N;
+    M.GetSubMatrix(0, 1, 2, 2) -= N.GetSubMatrix(0, 0, 2, 2);
+
+    math::RowMatrix<ElementType> R{
+        { 5, 9, 5 },
+        { 1, 6, 10 }
+    };
+    testing::ProcessTest("Matrix::operator-=(Matrix)", M == R);
+}
+
+template <typename ElementType, math::MatrixLayout layout>
+void TestMatrixTimesEqualsOperator()
+{
+    math::Matrix<ElementType, layout> M{
+        { 1, 2, 0 },
+        { 0, 3, 7 }
+    };
+
+    M *= -1;
+    M.GetSubMatrix(0, 1, 2, 2) *= 2;
+
+    math::RowMatrix<ElementType> R{
+        { -1, -4, 0 },
+        { 0, -6, -14 }
+    };
+
+    testing::ProcessTest("Matrix::operator*=", M == R);
+}
+
+template <typename ElementType, math::MatrixLayout layout>
+void TestMatrixDivideEqualsOperator()
+{
+    math::Matrix<ElementType, layout> M{
+        { 2, 4, 0 },
+        { 0, 6, -8 }
+    };
+
+    M /= -2;
+    M.GetSubMatrix(0, 1, 2, 2) /= 0.5;
+
+    math::RowMatrix<ElementType> R{
+        { -1, -4, 0 },
+        { 0, -6, 8 }
+    };
+
+    testing::ProcessTest("Matrix::operator/=", M == R);
+}
+
+template <typename ElementType, math::MatrixLayout layout, math::ImplementationType implementation>
+void TestMatrixAddUpdateScalar()
+{
+    auto implementationName = math::Internal::MatrixOperations<implementation>::GetImplementationName();
+
+    math::Matrix<ElementType, layout> M{
+        { 1, 2, 0 },
+        { 0, 3, 7 }
+    };
+
+    math::AddUpdate<implementation>(static_cast<ElementType>(-2), M);
+    math::AddUpdate<implementation>(static_cast<ElementType>(1), M.GetSubMatrix(0, 1, 2, 2));
+
+    math::RowMatrix<ElementType> R{
+        { -1, 1, -1 },
+        { -2, 2, 6 }
+    };
+
+    testing::ProcessTest(implementationName + "::AddUpdate(scalar, Matrix)", M == R);
+}
+
+template <typename ElementType, math::MatrixLayout layout, math::ImplementationType implementation>
+void TestMatrixAddUpdateZero()
+{
+    auto implementationName = math::Internal::MatrixOperations<implementation>::GetImplementationName();
+
+    math::Matrix<ElementType, layout> M{
+        { 1, 2, 0 },
+        { 0, 3, 7 }
+    };
+
+    math::AddUpdate<implementation>(static_cast<ElementType>(0), M);
+    math::AddUpdate<implementation>(static_cast<ElementType>(0), M.GetSubMatrix(0, 1, 2, 2));
+
+    math::RowMatrix<ElementType> R{
+        { 1, 2, 0 },
+        { 0, 3, 7 }
+    };
+
+    testing::ProcessTest(implementationName + "::AddUpdate(0, Matrix)", M == R);
+}
+
+template <typename ElementType, math::MatrixLayout layoutA, math::MatrixLayout layoutB, math::ImplementationType implementation>
+void TestMatrixAddUpdateMatrix()
+{
+    auto implementationName = math::Internal::MatrixOperations<implementation>::GetImplementationName();
+
+    math::Matrix<ElementType, layoutA> A{
+        { 1, 2, 0 },
+        { 0, 3, 7 }
+    };
+
+    math::Matrix<ElementType, layoutB> B{
+        { 4, 3, 2 },
+        { 1, 2, 1 }
+    };
+
+    math::AddUpdate<implementation>(A, B);
+    math::AddUpdate<implementation>(A.GetSubMatrix(0, 1, 2, 2), B.GetSubMatrix(0, 1, 2, 2));
+
+    math::RowMatrix<ElementType> R{
+        { 5, 7, 2 },
+        { 1, 8, 15 }
+    };
+
+    testing::ProcessTest(implementationName + "::AddUpdate(Matrix, Matrix)", B == R);
+}
+
+template <typename ElementType, math::MatrixLayout layoutA, math::MatrixLayout layoutB, math::ImplementationType implementation>
+void TestMatrixAddSetScalar()
+{
+    auto implementationName = math::Internal::MatrixOperations<implementation>::GetImplementationName();
+
+    math::Matrix<ElementType, layoutA> M{
+        { 1, 2, 0 },
+        { 0, 3, 7 }
+    };
+    math::Matrix<ElementType, layoutB> N(2, 3);
+
+    math::AddSet<implementation>(static_cast<ElementType>(-2), M, N);
+    math::AddSet<implementation>(static_cast<ElementType>(1), M.GetSubMatrix(0, 1, 2, 2), N.GetSubMatrix(0, 1, 2, 2));
+
+    math::RowMatrix<ElementType> R{
+        { -1, 3, 1 },
+        { -2, 4, 8 }
+    };
+
+    testing::ProcessTest(implementationName + "::AddSet(scalar, Matrix, Matrix)", N == R);
+}
+
+template <typename ElementType, math::MatrixLayout layoutA, math::MatrixLayout layoutB, math::ImplementationType implementation>
+void TestMatrixAddSetZero()
+{
+    auto implementationName = math::Internal::MatrixOperations<implementation>::GetImplementationName();
+
+    math::Matrix<ElementType, layoutA> M{
+        { 1, 2, 0 },
+        { 0, 3, 7 }
+    };
+    math::Matrix<ElementType, layoutB> N(2, 3);
+
+    math::AddSet<implementation>(static_cast<ElementType>(0), M, N);
+    math::AddSet<implementation>(static_cast<ElementType>(0), M.GetSubMatrix(0, 1, 2, 2), N.GetSubMatrix(0, 1, 2, 2));
+
+    testing::ProcessTest(implementationName + "::AddSet(0, Matrix, Matrix)", M == N);
+}
+
+template <typename ElementType, math::MatrixLayout layoutA, math::MatrixLayout layoutB, math::ImplementationType implementation>
+void TestMatrixAddSetMatrix()
+{
+    auto implementationName = math::Internal::MatrixOperations<implementation>::GetImplementationName();
+
+    math::Matrix<ElementType, layoutA> A{
+        { 1, 2, 0 },
+        { 0, 3, 7 }
+    };
+
+    math::Matrix<ElementType, layoutB> B{
+        { 1, 2, 1 },
+        { 0, 1, 6 }
+    };
+
+    math::Matrix<ElementType, layoutB> N(2, 3);
+
+    math::AddSet<implementation>(A, B, N);
+    math::AddSet<implementation>(A.GetSubMatrix(0, 1, 2, 2), B.GetSubMatrix(0, 1, 2, 2), N.GetSubMatrix(0, 1, 2, 2));
+
+    math::Matrix<ElementType, layoutB> R{
+        { 2, 4, 1 },
+        { 0, 4, 13 }
+    };
+
+    testing::ProcessTest(implementationName + "::AddSet(Matrix, Matrix, Matrix)", N == R);
+}
+
+template <typename ElementType, math::MatrixLayout layout, math::ImplementationType implementation>
+void TestMatrixScaleUpdate()
+{
+    auto implementationName = math::Internal::MatrixOperations<implementation>::GetImplementationName();
+
+    math::Matrix<ElementType, layout> M{
+        { 1, 2, 0 },
+        { 0, 3, 7 }
+    };
+
+    math::ScaleUpdate<implementation>(static_cast<ElementType>(-1), M);
+    math::ScaleUpdate<implementation>(static_cast<ElementType>(2), M.GetSubMatrix(0, 1, 2, 2));
+
+    math::RowMatrix<ElementType> R{
+        { -1, -4, 0 },
+        { 0, -6, -14 }
+    };
+
+    testing::ProcessTest(implementationName + "::ScaleUpdate(scalar, Matrix)", M == R);
+}
+
+template <typename ElementType, math::MatrixLayout layoutA, math::MatrixLayout layoutB, math::ImplementationType implementation>
+void TestMatrixScaleSet()
+{
+    auto implementationName = math::Internal::MatrixOperations<implementation>::GetImplementationName();
+
+    math::Matrix<ElementType, layoutA> M{
+        { 1, 2, 0 },
+        { 0, 3, 7 }
+    };
+
+    math::Matrix<ElementType, layoutB> N(2, 3);
+
+    math::ScaleSet<implementation>(static_cast<ElementType>(0), M, N);
+    math::ScaleSet<implementation>(static_cast<ElementType>(-1), M, N);
+    math::ScaleSet<implementation>(static_cast<ElementType>(2), M.GetSubMatrix(0, 1, 2, 2), N.GetSubMatrix(0, 1, 2, 2));
+
+    math::RowMatrix<ElementType> R{
+        { -1, 4, 0 },
+        { 0, 6, 14 }
+    };
+
+    testing::ProcessTest(implementationName + "::ScaleSet(scalar, Matrix, Matrix)", N == R);
+}
+
+template <typename ElementType, math::MatrixLayout layoutA, math::MatrixLayout layoutB, math::ImplementationType implementation>
+void TestMatrixScaleAddUpdateScalarMatrixOne()
+{
+    auto implementationName = math::Internal::MatrixOperations<implementation>::GetImplementationName();
+
+    math::Matrix<ElementType, layoutA> M{
+        { -1, 0, 0 },
+        { -1, 1, 3 }
+    };
+
+    math::Matrix<ElementType, layoutB> N{
+        { 1, 2, 1 },
+        { 0, -3, 4 }
+    };
+
+    math::ScaleAddUpdate<implementation>(static_cast<ElementType>(0), M, math::One(), N);
+    math::ScaleAddUpdate<implementation>(static_cast<ElementType>(1), M, math::One(), N);
+    math::ScaleAddUpdate<implementation>(static_cast<ElementType>(-2), M.GetSubMatrix(0, 1, 2, 2), math::One(), N.GetSubMatrix(0, 1, 2, 2));
+
+    math::RowMatrix<ElementType> R{
+        { 0, 2, 1 },
+        { -1, -4, 1 }
+    };
+
+    testing::ProcessTest(implementationName + "::ScaleAddUpdate(scalar, Matrix, one, Matrix)", N == R);
+}
+
+template <typename ElementType, math::MatrixLayout layout, math::ImplementationType implementation>
+void TestMatrixScaleAddUpdateScalarOnesMatrix()
+{
+    auto implementationName = math::Internal::MatrixOperations<implementation>::GetImplementationName();
+
+    math::Matrix<ElementType, layout> M{
+        { 1, 2, 0 },
+        { 0, 3, 7 }
+    };
+
+    math::ScaleAddUpdate<implementation>(static_cast<ElementType>(0), math::OnesMatrix(), static_cast<ElementType>(1), M);
+    math::ScaleAddUpdate<implementation>(static_cast<ElementType>(-1), math::OnesMatrix(), static_cast<ElementType>(2), M);
+    math::ScaleAddUpdate<implementation>(static_cast<ElementType>(1), math::OnesMatrix(), static_cast<ElementType>(-1), M.GetSubMatrix(0, 1, 2, 2));
+
+    math::RowMatrix<ElementType> R{
+        { 1, -2, 2 },
+        { -1, -4, -12 }
+    };
+
+    testing::ProcessTest(implementationName + "::ScaleAddUpdate(scalar, ones, scalar, Matrix)", M == R);
+}
+
+template <typename ElementType, math::MatrixLayout layoutA, math::MatrixLayout layoutB, math::ImplementationType implementation>
+void TestMatrixScaleAddUpdateOneMatrixScalar()
+{
+    auto implementationName = math::Internal::MatrixOperations<implementation>::GetImplementationName();
+
+    math::Matrix<ElementType, layoutA> M{
+        { 1, 2, -1 },
+        { -1, 3, 7 }
+    };
+
+    math::Matrix<ElementType, layoutB> N{
+        { 1, 0, 0 },
+        { 0, 3, 7 }
+    };
+
+    math::ScaleAddUpdate<implementation>(math::One(), M, static_cast<ElementType>(0), N);
+    math::ScaleAddUpdate<implementation>(math::One(), M, static_cast<ElementType>(-1), N);
+    math::ScaleAddUpdate<implementation>(math::One(), M.GetSubMatrix(0, 1, 2, 2), static_cast<ElementType>(-1), N.GetSubMatrix(0, 1, 2, 2));
+
+    math::RowMatrix<ElementType> R{
+        { 0, 2, -1 },
+        { 0, 3, 7 }
+    };
+
+    testing::ProcessTest(implementationName + "::ScaleAddUpdate(one, Matrix, scale, Matrix)", N == R);
+}
+
+template <typename ElementType, math::MatrixLayout layoutA, math::MatrixLayout layoutB, math::ImplementationType implementation>
+void TestMatrixScaleAddUpdateScalarMatrixScalar()
+{
+    auto implementationName = math::Internal::MatrixOperations<implementation>::GetImplementationName();
+
+    math::Matrix<ElementType, layoutA> M{
+        { 1, -2, 0 },
+        { 0, 3, 2 }
+    };
+
+    math::Matrix<ElementType, layoutB> N{
+        { -1, 2, 0 },
+        { 0, -3, 7 }
+    };
+
+    math::ScaleAddUpdate<implementation>(static_cast<ElementType>(0), M, static_cast<ElementType>(1), N);
+    math::ScaleAddUpdate<implementation>(static_cast<ElementType>(1), M, static_cast<ElementType>(-1), N);
+    math::ScaleAddUpdate<implementation>(static_cast<ElementType>(2), M.GetSubMatrix(0, 1, 2, 2), static_cast<ElementType>(2), N.GetSubMatrix(0, 1, 2, 2));
+
+    math::RowMatrix<ElementType> R{
+        { 2, -12, 0 },
+        { 0, 18, -6 }
+    };
+
+    testing::ProcessTest(implementationName + "::ScaleAddUpdate(scalar, Matrix, scalar, Matrix)", N == R);
+}
+
+template <typename ElementType, math::MatrixLayout layoutA, math::MatrixLayout layoutB, math::MatrixLayout outputLayout, math::ImplementationType implementation>
+void TestMatrixScaleAddSetScalarMatrixOne()
+{
+    auto implementationName = math::Internal::MatrixOperations<implementation>::GetImplementationName();
+
+    math::Matrix<ElementType, layoutA> M{
+        { 1, 2, 0 },
+        { 0, 3, 7 }
+    };
+
+    math::Matrix<ElementType, layoutB> N{
+        { -1, 1, 3 },
+        { 1, 1, 2 }
+    };
+
+    math::Matrix<ElementType, outputLayout> O(2, 3);
+
+    math::ScaleAddSet<implementation>(static_cast<ElementType>(-1), M, math::One(), N, O);
+
+    math::RowMatrix<ElementType> R{
+        { -2, -1, 3 },
+        { 1, -2, -5 }
+    };
+
+    testing::ProcessTest(implementationName + "::ScaleAddSet(scalar, Matrix, one, Matrix, Matrix, Matrix)", O == R);
+}
+
+template <typename ElementType, math::MatrixLayout layoutA, math::MatrixLayout layoutB, math::MatrixLayout outputLayout, math::ImplementationType implementation>
+void TestMatrixScaleAddSetOneMatrixScalar()
+{
+    auto implementationName = math::Internal::MatrixOperations<implementation>::GetImplementationName();
+
+    math::Matrix<ElementType, layoutA> M{
+        { 1, 2, 0 },
+        { 0, 3, 7 }
+    };
+
+    math::Matrix<ElementType, layoutB> N{
+        { -2, 0, 1 },
+        { 2, 1, 0 }
+    };
+
+    math::Matrix<ElementType, outputLayout> O(2, 3);
+
+    math::ScaleAddSet<implementation>(math::One(), M, static_cast<ElementType>(-1), N, O);
+
+    math::RowMatrix<ElementType> R{
+        { 3, 2, -1 },
+        { -2, 2, 7 }
+    };
+
+    testing::ProcessTest(implementationName + "::ScaleAddSet(one, Matrix, scalar, Matrix, Matrix, Matrix)", O == R);
+}
+
+template <typename ElementType, math::MatrixLayout layoutA, math::MatrixLayout layoutB, math::MatrixLayout outputLayout, math::ImplementationType implementation>
+void TestMatrixScaleAddSetScalarMatrixScalar()
+{
+    auto implementationName = math::Internal::MatrixOperations<implementation>::GetImplementationName();
+
+    math::Matrix<ElementType, layoutA> M{
+        { 1, 2, 0 },
+        { 0, 3, 7 }
+    };
+
+    math::Matrix<ElementType, layoutB> N{
+        { 1, -1, 2 },
+        { 2, -1, 0 }
+    };
+
+    math::Matrix<ElementType, outputLayout> O(2, 3);
+
+    math::ScaleAddSet<implementation>(static_cast<ElementType>(2), M, static_cast<ElementType>(-1), N, O);
+
+    math::RowMatrix<ElementType> R{
+        { 1, 5, -2 },
+        { -2, 7, 14 }
+    };
+
+    testing::ProcessTest(implementationName + "::ScaleAddSet(scalar, Matrix, scalar, Matrix, Matrix, Matrix)", O == R);
+}
+
+template <typename ElementType, math::MatrixLayout layout>
+void TestMatrixRowwiseSum()
+{
+    math::Matrix<ElementType, layout> M{
+        { 1, 2, 0 },
+        { 0, 3, 7 }
+    };
+
+    math::ColumnVector<ElementType> v(2);
+    math::RowwiseSum(M, v);
+
+    math::ColumnVector<ElementType> u{ 3, 10 };
+
+    testing::ProcessTest("ColumnwiseSum(Matrix, Vector)", v == u);
+}
+
+template <typename ElementType, math::MatrixLayout layout>
+void TestMatrixColumnwiseSum()
+{
+    math::Matrix<ElementType, layout> M{
+        { 1, 2, 0 },
+        { 0, 3, 7 }
+    };
+
+    math::RowVector<ElementType> v(3);
+    math::ColumnwiseSum(M, v);
+
+    math::RowVector<ElementType> u{ 1, 5, 7 };
+
+    testing::ProcessTest("ColumnwiseSum(Matrix, Vector)", v == u);
+}
+
+template <typename ElementType, math::MatrixLayout layout, math::ImplementationType implementation>
+void TestMatrixVectorMultiplyScaleAddUpdate()
+{
+    auto implementationName = math::Internal::MatrixOperations<implementation>::GetImplementationName();
+
+    math::Matrix<ElementType, layout> M{
+        { 1, 0 },
+        { 0, 1 },
+        { 2, 2 }
+    };
+
+    math::Matrix<ElementType, layout> N{
+        { 1, 0, 3, 1 },
+        { 0, 1, 0, -1 },
+        { 2, 0, 1, 3 },
+        { 2, 2, 2, 3 }
+    };
+
+    math::ColumnVector<ElementType> u{ 1, 1, 0 };
+    math::ColumnVector<ElementType> w{ 1, 1, 0 };
+    math::ColumnVector<ElementType> v{ 3, 4 };
+
+    ElementType s = 2;
+    ElementType t = 3;
+
+    math::MultiplyScaleAddUpdate<implementation>(s, M, v, t, u);
+    math::MultiplyScaleAddUpdate<implementation>(s, N.GetSubMatrix(1, 1, 3, 2), v, t, w);
+
+    math::ColumnVector<ElementType> r{ 9, 11, 28 };
+
+    testing::ProcessTest(implementationName + "::MultiplyScaleAddUpdate(scalar, Matrix, Vector, scalar, Vector)", u == r && w == r);
+}
+
+template <typename ElementType, math::MatrixLayout layout, math::ImplementationType implementation>
+void TestVectorMatrixMultiplyScaleAddUpdate()
+{
+    auto implementationName = math::Internal::MatrixOperations<implementation>::GetImplementationName();
+
+    math::Matrix<ElementType, layout> M{
+        { 1, 0 },
+        { 0, 1 },
+        { 2, 2 }
+    };
+
+    math::Matrix<ElementType, layout> N{
+        { 1, 0, 3, 1 },
+        { 0, 1, 0, -1 },
+        { 2, 0, 1, 3 },
+        { 2, 2, 2, 3 }
+    };
+
+    math::RowVector<ElementType> u{ 1, 1, 0 };
+    math::RowVector<ElementType> v{ 3, 4 };
+    math::RowVector<ElementType> w{ 3, 4 };
+
+    ElementType s = 2;
+    ElementType t = 3;
+
+    math::MultiplyScaleAddUpdate<implementation>(s, u, M, t, v);
+    math::MultiplyScaleAddUpdate<implementation>(s, u, N.GetSubMatrix(1, 1, 3, 2), t, w);
+
+    math::RowVector<ElementType> r{ 11, 14 };
+
+    testing::ProcessTest(implementationName + "::MultiplyScaleAddUpdate(scalar, Vector, Matrix, scalar, Vector)", v == r && w == r);
+}
+
+template <typename ElementType, math::MatrixLayout layout1, math::MatrixLayout layout2, math::MatrixLayout layout3, math::ImplementationType implementation>
+void TestMatrixMatrixMultiplyScaleAddUpdate()
+{
+    auto implementationName = math::Internal::MatrixOperations<implementation>::GetImplementationName();
+
+    math::Matrix<ElementType, layout1> A{
+        { 1, 2 },
+        { 3, 1 },
+        { 2, 0 }
+    };
+
+    // a padded version of A
+    math::Matrix<ElementType, layout1> AA{
+        { 1, 1, 1, 1 },
+        { 1, 1, 2, 1 },
+        { 1, 3, 1, 1 },
+        { 1, 2, 0, 1 },
+        { 1, 1, 1, 1 }
+    };
+
+    math::Matrix<ElementType, layout2> B{
+        { 3, 4, 5, 6 },
+        { 8, 9, 10, 11 }
+    };
+
+    // A padded version of B
+    math::Matrix<ElementType, layout2> BB{
+        { 1, 1, 1, 1, 1, 1 },
+        { 1, 3, 4, 5, 6, 1 },
+        { 1, 8, 9, 10, 11, 1 },
+        { 1, 1, 1, 1, 1, 1 }
+    };
+
+    math::Matrix<ElementType, layout3> C(A.NumRows(), B.NumColumns());
+    C.Fill(1);
+    math::MultiplyScaleAddUpdate<implementation>(static_cast<ElementType>(1), A, B, static_cast<ElementType>(-1), C);
+
+    math::Matrix<ElementType, layout3> CC(A.NumRows() + 2, B.NumColumns() + 2);
+    CC.Fill(1);
+    auto CCC = CC.GetSubMatrix(1, 1, 3, 4);
+    math::MultiplyScaleAddUpdate<implementation>(static_cast<ElementType>(1), AA.GetSubMatrix(1, 1, 3, 2), BB.GetSubMatrix(1, 1, 2, 4), static_cast<ElementType>(-1), CCC);
+
+    math::Matrix<ElementType, layout3> R{
+        { 18, 21, 24, 27 },
+        { 16, 20, 24, 28 },
+        { 5, 7, 9, 11 }
+    };
+
+    testing::ProcessTest(implementationName + "::MultiplyScaleAddUpdate(scalar, Matrix, Matrix, scalar, Matrix)", C == R && CCC == R);
+}
+
+template <typename ElementType, math::MatrixLayout layout>
+void TestMatrixElementwiseMultiplySet()
+{
+    math::Matrix<ElementType, layout> M{
+        { 1, 2, 0 },
+        { 0, 3, 7 }
+    };
+
+    math::Matrix<ElementType, layout> N{
+        { -1, 1, -1 },
+        { 1, 1, 2 }
+    };
+
+    math::Matrix<ElementType, layout> C(2, 3);
+
+    math::ElementwiseMultiplySet(M, N, C);
+
+    math::RowMatrix<ElementType> R{
+        { -1, 2, 0 },
+        { 0, 3, 14 }
+    };
+
+    testing::ProcessTest("ElementwiseMultiplySet(Matrix, Matrix, Matrix)", C == R);
+}
+
+template <typename ElementType, math::MatrixLayout layout>
+void TestMatrixRowwiseCumulativeSumUpdate()
+{
+    math::Matrix<ElementType, layout> M{
+        { 1, 2, 0 },
+        { 0, 3, 7 }
+    };
+
+    math::RowwiseCumulativeSumUpdate(M);
+
+    math::Matrix<ElementType, layout> R{
+        { 1, 3, 3 },
+        { 0, 3, 10 }
+    };
+
+    testing::ProcessTest("RowwiseCumulativeSumUpdate(Matrix)", M == R);
+}
+
+template <typename ElementType, math::MatrixLayout layout>
+void TestMatrixColumnwiseCumulativeSumUpdate()
+{
+    math::Matrix<ElementType, layout> M{
+        { 1, 2, 0 },
+        { 0, 3, 7 }
+    };
+
+    math::ColumnwiseCumulativeSumUpdate(M);
+
+    math::Matrix<ElementType, layout> R{
+        { 1, 2, 0 },
+        { 1, 5, 7 }
+    };
+
+    testing::ProcessTest("ColumnwiseCumulativeSumUpdate(Matrix)", M == R);
+}
+
+template <typename ElementType, math::MatrixLayout layout>
+void TestMatrixRowwiseConsecutiveDifferenceUpdate()
+{
+    math::Matrix<ElementType, layout> M{
+        { 1, 2, 0 },
+        { 0, 3, 7 }
+    };
+
+    math::RowwiseConsecutiveDifferenceUpdate(M);
+
+    math::Matrix<ElementType, layout> R{
+        { 1, 1, -2 },
+        { 0, 3, 4 }
+    };
+
+    testing::ProcessTest("RowwiseConsecutiveDifferenceUpdate(Matrix)", M == R);
+}
+
+template <typename ElementType, math::MatrixLayout layout>
+void TestMatrixColumnwiseConsecutiveDifferenceUpdate()
+{
+    math::Matrix<ElementType, layout> M{
+        { 1, 2, 0 },
+        { 0, 3, 7 }
+    };
+
+    math::ColumnwiseConsecutiveDifferenceUpdate(M);
+
+    math::Matrix<ElementType, layout> R{
+        { 1, 2, 0 },
+        { -1, 1, 7 }
+    };
+
+    testing::ProcessTest("ColumnwiseConsecutiveDifferenceUpdate(Matrix)", M == R);
+}
+
+template <typename ElementType, math::MatrixLayout layout>
+void TestMatrixArchiver()
+{
+    math::Matrix<ElementType, layout> M(3, 4);
+    M(0, 0) = 1;
+    M(0, 2) = 4;
+    M(2, 3) = 7;
+
+    utilities::SerializationContext context;
+    std::stringstream strstream;
+    utilities::JsonArchiver archiver(strstream);
+
+    math::MatrixArchiver::Write(M, "test", archiver);
+    utilities::JsonUnarchiver unarchiver(strstream, context);
+
+    math::Matrix<ElementType, layout> Ma(0, 0);
+    math::MatrixArchiver::Read(Ma, "test", unarchiver);
+
+    testing::ProcessTest("MatrixArchiver", Ma == M);
+}
+
+#pragma endregion implementation
diff --git a/libraries/math/test/include/Tensor_test.h b/libraries/math/test/include/Tensor_test.h
index 36d1f8396..95458ce62 100644
--- a/libraries/math/test/include/Tensor_test.h
+++ b/libraries/math/test/include/Tensor_test.h
@@ -106,4 +106,806 @@ void TestTensorVectorMultiply();
 template <typename ElementType, math::Dimension dimension0, math::Dimension dimension1, math::Dimension dimension2, math::ImplementationType implementation>
 void TestTensorVectorScaleAddUpdate();
 
-#include "../tcc/Tensor_test.tcc"
+#pragma region implementation
+
+#include <math/include/TensorOperations.h>
+
+#include <testing/include/testing.h>
+
+// stl
+#include <cstdlib> // rand
+
+template <typename ElementType, math::Dimension dimension0, math::Dimension dimension1, math::Dimension dimension2>
+void TestTensorIndexer()
+{
+    auto T = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
+        { { 1, 2, 3, 4 }, { 1, 2, 3, 4 }, { 1, 2, 3, 4 } },
+        { { 1, 2, 3, 4 }, { 1, 2, 3, 4 }, { 1, 2, 3, 4 } }
+    };
+
+    auto S = T.GetSubTensor({ 0, 1, 2 }, { 2, 2, 2 });
+
+    T(1, 2, 3) = 7;
+    T(0, 1, 2) = 8;
+
+    auto R1 = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
+        { { 1, 2, 3, 4 }, { 1, 2, 8, 4 }, { 1, 2, 3, 4 } },
+        { { 1, 2, 3, 4 }, { 1, 2, 3, 4 }, { 1, 2, 3, 7 } }
+    };
+
+    auto R2 = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
+        { { 8, 4 }, { 3, 4 } },
+        { { 3, 4 }, { 3, 7 } }
+    };
+
+    testing::ProcessTest("Tensor::operator()", T == R1 && S == R2);
+}
+
+template <typename ElementType, math::Dimension dimension0, math::Dimension dimension1, math::Dimension dimension2>
+void TestTensorSize()
+{
+    math::Tensor<ElementType, dimension0, dimension1, dimension2> T(10, 20, 30);
+    auto S = T.GetSubTensor({ 0, 1, 2 }, { 2, 2, 2 });
+
+    testing::ProcessTest("Tensor::Size", T.Size() == 10 * 20 * 30 && S.Size() == 2 * 2 * 2);
+}
+
+template <typename ElementType, math::Dimension dimension0, math::Dimension dimension1, math::Dimension dimension2>
+void TestTensorNumRows()
+{
+    math::Tensor<ElementType, dimension0, dimension1, dimension2> T(10, 20, 30);
+
+    testing::ProcessTest("Tensor::NumRows", T.NumRows() == 10);
+}
+
+template <typename ElementType, math::Dimension dimension0, math::Dimension dimension1, math::Dimension dimension2>
+void TestTensorNumColumns()
+{
+    math::Tensor<ElementType, dimension0, dimension1, dimension2> T(10, 20, 30);
+
+    testing::ProcessTest("Tensor::NumColumns", T.NumColumns() == 20);
+}
+
+template <typename ElementType, math::Dimension dimension0, math::Dimension dimension1, math::Dimension dimension2>
+void TestTensorNumChannels()
+{
+    math::Tensor<ElementType, dimension0, dimension1, dimension2> T(10, 20, 30);
+
+    testing::ProcessTest("Tensor::NumChannels", T.NumChannels() == 30);
+}
+
+template <typename ElementType, math::Dimension dimension0, math::Dimension dimension1, math::Dimension dimension2>
+void TestTensorGetShape()
+{
+    math::Tensor<ElementType, dimension0, dimension1, dimension2> T(10, 20, 30);
+    auto shape = T.GetShape();
+
+    testing::ProcessTest("Tensor::GetShape", shape == math::TensorShape{ 10, 20, 30 });
+}
+
+template <typename ElementType>
+void TestTensorNumSlices()
+{
+    math::ColumnRowChannelTensor<ElementType> T(10, 20, 30);
+    math::ChannelColumnRowTensor<ElementType> S(10, 20, 30);
+
+    testing::ProcessTest("Tensor::NumSlices",
+                         math::NumSlices<math::Dimension::column, math::Dimension::row>(T) == 30 && math::NumSlices<math::Dimension::row, math::Dimension::column>(T) == 30 && math::NumSlices<math::Dimension::column, math::Dimension::channel>(T) == 10 && math::NumSlices<math::Dimension::channel, math::Dimension::column>(T) == 10 && math::NumSlices<math::Dimension::channel, math::Dimension::row>(S) == 20 && math::NumSlices<math::Dimension::row, math::Dimension::channel>(S) == 20 && math::NumSlices<math::Dimension::column, math::Dimension::channel>(S) == 10 && math::NumSlices<math::Dimension::channel, math::Dimension::column>(S) == 10);
+
+    auto test1DNumSlices = [](auto T) {
+        testing::ProcessTest("Tensor::NumSlices",
+                             math::NumSlices<math::Dimension::channel>(T) == (10 * 20) && math::NumSlices<math::Dimension::column>(T) == (10 * 30) && math::NumSlices<math::Dimension::row>(T) == (20 * 30));
+    };
+    test1DNumSlices(T);
+    test1DNumSlices(S);
+}
+
+template <typename ElementType>
+void TestTensorNumPrimarySlices()
+{
+    math::ColumnRowChannelTensor<ElementType> T(10, 20, 30);
+    math::ChannelColumnRowTensor<ElementType> S(10, 20, 30);
+
+    testing::ProcessTest("Tensor::NumPrimarySlices", T.NumPrimarySlices() == 30 && S.NumPrimarySlices() == 10);
+}
+
+template <typename ElementType, math::Dimension dimension0, math::Dimension dimension1, math::Dimension dimension2>
+void TestTensorIsEqual()
+{
+    auto S = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
+        { { 1, 2, 3, 4 }, { 1, 2, 3, 4 }, { 1, 2, 3, 4 } },
+        { { 1, 2, 3, 4 }, { 1, 2, 3, 4 }, { 1, 2, 3, 4 } }
+    };
+
+    auto T = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
+        { { 1, 2, 3, 4 }, { 1, 2, 3, 4 }, { 1, 2, 3, 4 } },
+        { { 1, 2, 3, 4 }, { 1, 2, 3, 4 }, { 1, 2, 3, 4 } }
+    };
+
+    testing::ProcessTest("Tensor::IsEqual", S.IsEqual(T) && T.GetSubTensor({ 0, 1, 2 }, { 2, 2, 2 }).IsEqual(S.GetSubTensor({ 0, 1, 2 }, { 2, 2, 2 })));
+}
+
+template <typename ElementType, math::Dimension dimension0, math::Dimension dimension1, math::Dimension dimension2>
+void TestTensorEqualityOperator()
+{
+    auto S = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
+        { { 1, 2, 3, 4 }, { 1, 2, 3, 4 }, { 1, 2, 3, 4 } },
+        { { 1, 2, 3, 4 }, { 1, 2, 3, 4 }, { 1, 2, 3, 4 } }
+    };
+
+    auto T = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
+        { { 1, 2, 3, 4 }, { 1, 2, 3, 4 }, { 1, 2, 3, 4 } },
+        { { 1, 2, 3, 4 }, { 1, 2, 3, 4 }, { 1, 2, 3, 4 } }
+    };
+
+    testing::ProcessTest("Tensor::operator==", T == S && T.GetSubTensor({ 0, 1, 2 }, { 2, 2, 2 }) == S.GetSubTensor({ 0, 1, 2 }, { 2, 2, 2 }));
+}
+
+template <typename ElementType, math::Dimension dimension0, math::Dimension dimension1, math::Dimension dimension2>
+void TestTensorInequalityOoperator()
+{
+    auto S = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
+        { { 1, 2, 3, 4 }, { 1, 2, 3, 4 }, { 1, 2, 3, 4 } },
+        { { 1, 2, 3, 4 }, { 1, 2, 3, 4 }, { 1, 2, 3, 4 } }
+    };
+
+    auto T = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
+        { { 1, 2, 3, 4 }, { 1, 2, 8, 4 }, { 1, 2, 3, 4 } },
+        { { 1, 2, 3, 4 }, { 1, 2, 3, 4 }, { 1, 2, 3, 4 } }
+    };
+
+    auto U = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
+        { { 1, 2, 3, 4 }, { 1, 2, 3, 4 } },
+        { { 1, 2, 3, 4 }, { 1, 2, 3, 4 } }
+    };
+
+    testing::ProcessTest("Tensor::operator!=", T != S && T.GetSubTensor({ 0, 1, 2 }, { 2, 2, 2 }) != S.GetSubTensor({ 0, 1, 2 }, { 2, 2, 2 }) && T != U);
+}
+
+template <typename ElementType, math::Dimension dimension0, math::Dimension dimension1, math::Dimension dimension2>
+void TestTensorGetConstReference()
+{
+    auto T = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
+        { { 1, 2, 3, 4 }, { 1, 2, 3, 4 }, { 1, 2, 3, 4 } },
+        { { 1, 2, 3, 4 }, { 1, 2, 3, 4 }, { 1, 2, 3, 4 } }
+    };
+
+    auto S = T.GetSubTensor({ 0, 1, 2 }, { 2, 2, 2 });
+
+    testing::ProcessTest("Tensor::operator==", T == T.GetConstReference() && S == S.GetConstReference());
+}
+
+template <typename ElementType, math::Dimension dimension0, math::Dimension dimension1, math::Dimension dimension2>
+void TestTensorGetSubTensor()
+{
+    auto T = math::Tensor<ElementType, dimension0, dimension1, dimension2>(4, 6, 8);
+    auto subT = T.GetSubTensor({ 1, 2, 3 }, { 2, 3, 4 });
+    subT.Fill(1);
+
+    auto S = math::Tensor<ElementType, dimension0, dimension1, dimension2>(4, 6, 8);
+    for (size_t i = 1; i < 3; ++i)
+    {
+        for (size_t j = 2; j < 5; ++j)
+        {
+            for (size_t k = 3; k < 7; ++k)
+            {
+                S(i, j, k) = 1;
+            }
+        }
+    }
+
+    testing::ProcessTest("TestGetSubTensor()", T == S);
+}
+
+template <typename ElementType>
+void TestTensorGetSlice()
+{
+    math::ColumnRowChannelTensor<ElementType> T1(3, 4, 5);
+    T1(0, 0, 0) = 1;
+    T1(1, 2, 3) = 2;
+    T1(0, 3, 3) = 3;
+    T1(2, 2, 4) = 3;
+
+    auto T1Test2DSlice = [](auto T) {
+        auto M1 = math::GetSlice<math::Dimension::column, math::Dimension::row>(T, 3);
+        testing::ProcessTest("TensorReference::GetSlice()", M1(2, 1) == 2 && M1(3, 0) == 3);
+
+        auto M2 = math::GetSlice<math::Dimension::row, math::Dimension::column>(T, 3);
+        testing::ProcessTest("TensorReference::GetSlice()", M2(1, 2) == 2 && M2(0, 3) == 3);
+
+        auto M3 = math::GetSlice<math::Dimension::column, math::Dimension::channel>(T, 0);
+        testing::ProcessTest("TensorReference::GetSlice()", M3(0, 0) == 1 && M3(3, 3) == 3);
+
+        auto M4 = math::GetSlice<math::Dimension::channel, math::Dimension::column>(T, 0);
+        testing::ProcessTest("TensorReference::GetSlice()", M4(0, 0) == 1 && M4(3, 3) == 3);
+    };
+
+    T1Test2DSlice(T1);
+    T1Test2DSlice(T1.GetConstReference());
+
+    math::ChannelColumnRowTensor<ElementType> T2(3, 4, 5);
+    T2(0, 0, 0) = 1;
+    T2(1, 2, 3) = 2;
+    T2(0, 3, 3) = 3;
+    T2(2, 2, 4) = 4;
+
+    auto T2Test2DSlice = [](auto T) {
+        auto M1 = math::GetSlice<math::Dimension::column, math::Dimension::channel>(T, 0);
+        testing::ProcessTest("TensorReference::GetSlice()", M1(0, 0) == 1 && M1(3, 3) == 3);
+
+        auto M2 = math::GetSlice<math::Dimension::channel, math::Dimension::column>(T, 0);
+        testing::ProcessTest("TensorReference::GetSlice()", M2(0, 0) == 1 && M2(3, 3) == 3);
+
+        auto M3 = math::GetSlice<math::Dimension::row, math::Dimension::channel>(T, 2);
+        testing::ProcessTest("TensorReference::GetSlice()", M3(1, 3) == 2 && M3(2, 4) == 4);
+
+        auto M4 = math::GetSlice<math::Dimension::channel, math::Dimension::row>(T, 2);
+        testing::ProcessTest("TensorReference::GetSlice()", M4(3, 1) == 2 && M4(4, 2) == 4);
+    };
+
+    T2Test2DSlice(T2);
+    T2Test2DSlice(T2.GetConstReference());
+
+    auto vectorSliceTest = [](auto _) {
+        using TensorType = decltype(_);
+
+        // T = numpy.arange(5 * 7 * 11).reshape(5, 7, 11)
+        TensorType T(5, 7, 11);
+        for (unsigned i = 0; i < 5; ++i)
+        {
+            for (unsigned j = 0; j < 7; ++j)
+            {
+                for (unsigned k = 0; k < 11; ++k)
+                {
+                    T(i, j, k) = static_cast<typename TensorType::TensorElementType>(k + j * 11 + i * 77);
+                }
+            }
+        }
+
+        auto test1DGetSlice = [](auto T) {
+            // equivalent of NumPy's T[4, 6, ...]
+            auto V1 = math::GetSlice<math::Dimension::channel>(T, 4, 6);
+            testing::ProcessTest("TensorReference::GetSlice()", V1 == math::ColumnVector<ElementType>({ 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384 }));
+
+            // equivalent of NumPy's T[4, ..., 8]
+            auto V2 = math::GetSlice<math::Dimension::column>(T, 4, 8);
+            testing::ProcessTest("TensorReference::GetSlice()", V2 == math::ColumnVector<ElementType>({ 316, 327, 338, 349, 360, 371, 382 }));
+
+            // equivalent of NumPy's T[..., 6, 8]
+            auto V3 = math::GetSlice<math::Dimension::row>(T, 6, 8);
+            testing::ProcessTest("TensorReference::GetSlice()", V3 == math::ColumnVector<ElementType>({ 74, 151, 228, 305, 382 }));
+        };
+
+        test1DGetSlice(T);
+        test1DGetSlice(T.GetConstReference());
+
+        typename TensorType::TensorElementType originalElementVal = 0;
+
+        // T[..., 6, 8][0] = 0
+        auto V1 = math::GetSlice<math::Dimension::channel>(T, 4, 6);
+        std::swap(originalElementVal, V1[0]);
+        testing::ProcessTest("TensorReference::GetSlice() after modification", V1 == math::ColumnVector<ElementType>({ 0, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384 }));
+        testing::ProcessTest("T(4, 6, 0) == 0", T(4, 6, 0) == 0);
+        std::swap(originalElementVal, V1[0]);
+
+        // T[4..., 8][0] = 0
+        auto V2 = math::GetSlice<math::Dimension::column>(T, 4, 8);
+        std::swap(originalElementVal, V2[0]);
+        testing::ProcessTest("TensorReference::GetSlice() after modification", V2 == math::ColumnVector<ElementType>({ 0, 327, 338, 349, 360, 371, 382 }));
+        testing::ProcessTest("T(4, 0, 8) == 0", T(4, 0, 8) == 0);
+        std::swap(originalElementVal, V2[0]);
+
+        // T[4, 6, ...][0] = 0
+        auto V3 = math::GetSlice<math::Dimension::row>(T, 6, 8);
+        std::swap(originalElementVal, V3[0]);
+        testing::ProcessTest("TensorReference::GetSlice() after modification", V3 == math::ColumnVector<ElementType>({ 0, 151, 228, 305, 382 }));
+        testing::ProcessTest("T(0, 6, 8) == 0", T(0, 6, 8) == 0);
+        std::swap(originalElementVal, V3[0]);
+    };
+
+    vectorSliceTest(math::ChannelColumnRowTensor<ElementType>{});
+    vectorSliceTest(math::ColumnRowChannelTensor<ElementType>{});
+}
+
+template <typename ElementType, math::Dimension dimension0, math::Dimension dimension1, math::Dimension dimension2>
+void TestTensorGetPrimarySlice()
+{}
+
+template <typename ElementType, math::Dimension dimension0, math::Dimension dimension1, math::Dimension dimension2>
+void TestTensorReferenceAsVector()
+{
+    math::ChannelColumnRowTensor<ElementType> T(3, 4, 2);
+    T(0, 0, 0) = 1;
+    T(0, 0, 1) = 2;
+    T(0, 1, 0) = 3;
+    T(0, 1, 1) = 4;
+    math::ColumnRowChannelTensor<ElementType> S(T);
+
+    auto u = T.ReferenceAsVector();
+    auto v = S.ReferenceAsVector();
+
+    math::RowVector<ElementType> r1{ 1, 2, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+    math::RowVector<ElementType> r2{ 1, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+
+    testing::ProcessTest("TensorReference::ReferenceAsVector()", u == r1 && v == r2);
+}
+
+template <typename ElementType>
+void TestTensorReferenceAsMatrix()
+{
+    math::ChannelColumnRowTensor<ElementType> T(3, 4, 2);
+    T(0, 0, 0) = 1;
+    T(0, 0, 1) = 2;
+    T(0, 1, 0) = 3;
+    T(0, 1, 1) = 4;
+    math::ColumnRowChannelTensor<ElementType> S(T);
+
+    auto M = T.ReferenceAsMatrix();
+    auto N = S.ReferenceAsMatrix();
+
+    math::RowMatrix<ElementType> R1{
+        { 1, 2, 3, 4, 0, 0, 0, 0 },
+        { 0, 0, 0, 0, 0, 0, 0, 0 },
+        { 0, 0, 0, 0, 0, 0, 0, 0 }
+    };
+
+    math::RowMatrix<ElementType> R2{
+        { 1, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+        { 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+    };
+
+    testing::ProcessTest("TensorReference::ReferenceAsMatrix", M == R1 && N == R2);
+}
+
+template <typename ElementType>
+void TestTensorReferenceAsMatrixCopy()
+{
+    math::ChannelColumnRowTensor<ElementType> T(2, 4, 1);
+    float x = 1;
+    for (size_t i = 0; i < 2; i++)
+    {
+        for (size_t j = 0; j < 4; j++)
+        {
+            T(i, j, 0) = x++;
+        }
+    }
+
+    math::RowMatrix<ElementType> E{
+        { 1, 5 },
+        { 2, 6 },
+        { 3, 7 },
+        { 4, 8 }
+    };
+
+    auto r = T.GetConstReference();
+
+    auto result = math::RowMatrix<ElementType>(r.ReferenceAsMatrix().Transpose());
+
+    testing::ProcessTest("TensorReference::ReferenceAsMatrix.Transpose and copy", result.IsEqual(E));
+}
+
+template <typename ElementType, math::Dimension dimension0, math::Dimension dimension1, math::Dimension dimension2>
+void TestTensorCopyFrom()
+{
+    auto T = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
+        { { 1, 2, 3, 4 }, { 5, 6, 7, 8 }, { 9, 0, 1, 2 } },
+        { { 3, 4, 5, 6 }, { 7, 8, 9, 0 }, { 1, 2, 3, 4 } }
+    };
+
+    math::Tensor<ElementType, dimension0, dimension1, dimension2> S(2, 3, 4);
+    S.CopyFrom(T);
+
+    math::Tensor<ElementType, math::Dimension::column, math::Dimension::row, math::Dimension::channel> S2(2, 3, 4);
+    S2.CopyFrom(T);
+
+    auto M = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
+        { { 1, 2, 3, 4 }, { 5, 6, 7, 8 }, { 9, 0, 1, 2 } },
+        { { 3, 4, 5, 6 }, { 7, 8, 9, 0 }, { 1, 2, 3, 4 } }
+    };
+
+    auto N = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
+        { { 5, 6 }, { 9, 0 } },
+        { { 4, 5 }, { 7, 8 } }
+    };
+
+    M.GetSubTensor({ 0, 1, 2 }, { 2, 2, 2 }).CopyFrom(N);
+
+    auto R = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
+        { { 1, 2, 3, 4 }, { 5, 6, 5, 6 }, { 9, 0, 9, 0 } },
+        { { 3, 4, 5, 6 }, { 7, 8, 4, 5 }, { 1, 2, 7, 8 } }
+    };
+
+    testing::ProcessTest("TensorReference::CopyFrom", S == T && S2 == T && M == R);
+}
+
+template <typename ElementType, math::Dimension dimension0, math::Dimension dimension1, math::Dimension dimension2>
+void TestTensorReset()
+{
+    auto T = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
+        { { 1, 2, 3, 4 }, { 5, 6, 7, 8 }, { 9, 0, 1, 2 } },
+        { { 3, 4, 5, 6 }, { 7, 8, 9, 0 }, { 1, 2, 3, 4 } }
+    };
+
+    T.Reset();
+
+    math::Tensor<ElementType, dimension0, dimension1, dimension2> S(2, 3, 4);
+
+    auto M = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
+        { { 1, 2, 3, 4 }, { 5, 6, 7, 8 }, { 9, 0, 1, 2 } },
+        { { 3, 4, 5, 6 }, { 7, 8, 9, 0 }, { 1, 2, 3, 4 } }
+    };
+
+    M.GetSubTensor({ 0, 1, 2 }, { 2, 2, 2 }).Reset();
+
+    auto R = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
+        { { 1, 2, 3, 4 }, { 5, 6, 0, 0 }, { 9, 0, 0, 0 } },
+        { { 3, 4, 5, 6 }, { 7, 8, 0, 0 }, { 1, 2, 0, 0 } }
+    };
+
+    testing::ProcessTest("TensorReference::Reset", S == T && M == R);
+}
+
+template <typename ElementType, math::Dimension dimension0, math::Dimension dimension1, math::Dimension dimension2>
+void TestTensorFill()
+{
+    auto T = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
+        { { 1, 2, 3, 4 }, { 5, 6, 7, 8 }, { 9, 0, 1, 2 } },
+        { { 3, 4, 5, 6 }, { 7, 8, 9, 0 }, { 1, 2, 3, 4 } }
+    };
+
+    T.Fill(3);
+
+    auto S = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
+        { { 3, 3, 3, 3 }, { 3, 3, 3, 3 }, { 3, 3, 3, 3 } },
+        { { 3, 3, 3, 3 }, { 3, 3, 3, 3 }, { 3, 3, 3, 3 } }
+    };
+
+    auto M = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
+        { { 1, 2, 3, 4 }, { 5, 6, 7, 8 }, { 9, 0, 1, 2 } },
+        { { 3, 4, 5, 6 }, { 7, 8, 9, 0 }, { 1, 2, 3, 4 } }
+    };
+
+    M.GetSubTensor({ 0, 1, 2 }, { 2, 2, 2 }).Fill(3);
+
+    auto R = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
+        { { 1, 2, 3, 4 }, { 5, 6, 3, 3 }, { 9, 0, 3, 3 } },
+        { { 3, 4, 5, 6 }, { 7, 8, 3, 3 }, { 1, 2, 3, 3 } }
+    };
+
+    testing::ProcessTest("TensorReference::Fill", S == T && M == R);
+}
+
+template <typename ElementType, math::Dimension dimension0, math::Dimension dimension1, math::Dimension dimension2>
+void TestTensorGenerate()
+{
+    auto T = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
+        { { 1, 2, 3, 4 }, { 5, 6, 7, 8 }, { 9, 0, 1, 2 } },
+        { { 3, 4, 5, 6 }, { 7, 8, 9, 0 }, { 1, 2, 3, 4 } }
+    };
+
+    T.Generate([]() -> ElementType { return 3; });
+
+    auto S = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
+        { { 3, 3, 3, 3 }, { 3, 3, 3, 3 }, { 3, 3, 3, 3 } },
+        { { 3, 3, 3, 3 }, { 3, 3, 3, 3 }, { 3, 3, 3, 3 } }
+    };
+
+    auto M = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
+        { { 1, 2, 3, 4 }, { 5, 6, 7, 8 }, { 9, 0, 1, 2 } },
+        { { 3, 4, 5, 6 }, { 7, 8, 9, 0 }, { 1, 2, 3, 4 } }
+    };
+
+    M.GetSubTensor({ 0, 1, 2 }, { 2, 2, 2 }).Generate([]() -> ElementType { return 3; });
+
+    auto R = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
+        { { 1, 2, 3, 4 }, { 5, 6, 3, 3 }, { 9, 0, 3, 3 } },
+        { { 3, 4, 5, 6 }, { 7, 8, 3, 3 }, { 1, 2, 3, 3 } }
+    };
+
+    testing::ProcessTest("TensorReference::Generate", S == T && M == R);
+}
+
+template <typename ElementType, math::Dimension dimension0, math::Dimension dimension1, math::Dimension dimension2>
+void TestTensorTransform()
+{
+    auto T = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
+        { { 1, 2, 3, 4 }, { 5, 6, 7, 8 }, { 9, 0, 1, 2 } },
+        { { 3, 4, 5, 6 }, { 7, 8, 9, 0 }, { 1, 2, 3, 4 } }
+    };
+
+    T.Transform([](ElementType x) { return 2 * x; });
+
+    auto S = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
+        { { 2, 4, 6, 8 }, { 10, 12, 14, 16 }, { 18, 0, 2, 4 } },
+        { { 6, 8, 10, 12 }, { 14, 16, 18, 0 }, { 2, 4, 6, 8 } }
+    };
+
+    auto M = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
+        { { 1, 2, 3, 4 }, { 5, 6, 7, 8 }, { 9, 0, 1, 2 } },
+        { { 3, 4, 5, 6 }, { 7, 8, 9, 0 }, { 1, 2, 3, 4 } }
+    };
+
+    M.GetSubTensor({ 0, 1, 2 }, { 2, 2, 2 }).Transform([](ElementType x) { return 2 * x; });
+
+    auto R = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
+        { { 1, 2, 3, 4 }, { 5, 6, 14, 16 }, { 9, 0, 2, 4 } },
+        { { 3, 4, 5, 6 }, { 7, 8, 18, 0 }, { 1, 2, 6, 8 } }
+    };
+
+    testing::ProcessTest("TensorReference::Transform", S == T && M == R);
+}
+
+template <typename ElementType, math::Dimension dimension0, math::Dimension dimension1, math::Dimension dimension2>
+void TestTensorPlusEqualsOperator()
+{
+    auto T = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
+        { { 1, 2, 3, 4 }, { 5, 6, 7, 8 }, { 9, 0, 1, 2 } },
+        { { 3, 4, 5, 6 }, { 7, 8, 9, 0 }, { 1, 2, 3, 4 } }
+    };
+
+    T += 2;
+
+    auto S = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
+        { { 3, 4, 5, 6 }, { 7, 8, 9, 10 }, { 11, 2, 3, 4 } },
+        { { 5, 6, 7, 8 }, { 9, 10, 11, 2 }, { 3, 4, 5, 6 } }
+    };
+
+    auto M = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
+        { { 1, 2, 3, 4 }, { 5, 6, 7, 8 }, { 9, 0, 1, 2 } },
+        { { 3, 4, 5, 6 }, { 7, 8, 9, 0 }, { 1, 2, 3, 4 } }
+    };
+
+    M.GetSubTensor({ 0, 1, 2 }, { 2, 2, 2 }) += 2;
+
+    auto R = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
+        { { 1, 2, 3, 4 }, { 5, 6, 9, 10 }, { 9, 0, 3, 4 } },
+        { { 3, 4, 5, 6 }, { 7, 8, 11, 2 }, { 1, 2, 5, 6 } }
+    };
+
+    testing::ProcessTest("TensorReference::operator+=", S == T && M == R);
+}
+
+template <typename ElementType, math::Dimension dimension0, math::Dimension dimension1, math::Dimension dimension2>
+void TestTensorMinusEqualsOperator()
+{
+    auto T = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
+        { { 1, 2, 3, 4 }, { 5, 6, 7, 8 }, { 9, 0, 1, 2 } },
+        { { 3, 4, 5, 6 }, { 7, 8, 9, 0 }, { 1, 2, 3, 4 } }
+    };
+
+    T -= -2;
+
+    auto S = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
+        { { 3, 4, 5, 6 }, { 7, 8, 9, 10 }, { 11, 2, 3, 4 } },
+        { { 5, 6, 7, 8 }, { 9, 10, 11, 2 }, { 3, 4, 5, 6 } }
+    };
+
+    auto M = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
+        { { 1, 2, 3, 4 }, { 5, 6, 7, 8 }, { 9, 0, 1, 2 } },
+        { { 3, 4, 5, 6 }, { 7, 8, 9, 0 }, { 1, 2, 3, 4 } }
+    };
+
+    M.GetSubTensor({ 0, 1, 2 }, { 2, 2, 2 }) -= -2;
+
+    auto R = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
+        { { 1, 2, 3, 4 }, { 5, 6, 9, 10 }, { 9, 0, 3, 4 } },
+        { { 3, 4, 5, 6 }, { 7, 8, 11, 2 }, { 1, 2, 5, 6 } }
+    };
+
+    testing::ProcessTest("TensorReference::operator-=", S == T && M == R);
+}
+
+template <typename ElementType, math::Dimension dimension0, math::Dimension dimension1, math::Dimension dimension2>
+void TestTensorTimesEqualsOperator()
+{
+    auto T = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
+        { { 1, 2, 3, 4 }, { 5, 6, 7, 8 }, { 9, 0, 1, 2 } },
+        { { 3, 4, 5, 6 }, { 7, 8, 9, 0 }, { 1, 2, 3, 4 } }
+    };
+
+    T *= 2;
+
+    auto S = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
+        { { 2, 4, 6, 8 }, { 10, 12, 14, 16 }, { 18, 0, 2, 4 } },
+        { { 6, 8, 10, 12 }, { 14, 16, 18, 0 }, { 2, 4, 6, 8 } }
+    };
+
+    auto M = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
+        { { 1, 2, 3, 4 }, { 5, 6, 7, 8 }, { 9, 0, 1, 2 } },
+        { { 3, 4, 5, 6 }, { 7, 8, 9, 0 }, { 1, 2, 3, 4 } }
+    };
+
+    M.GetSubTensor({ 0, 1, 2 }, { 2, 2, 2 }) *= 2;
+
+    auto R = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
+        { { 1, 2, 3, 4 }, { 5, 6, 14, 16 }, { 9, 0, 2, 4 } },
+        { { 3, 4, 5, 6 }, { 7, 8, 18, 0 }, { 1, 2, 6, 8 } }
+    };
+
+    testing::ProcessTest("TensorReference::operator*=", S == T && M == R);
+}
+
+template <typename ElementType, math::Dimension dimension0, math::Dimension dimension1, math::Dimension dimension2>
+void TestTensorDivideEqualsOperator()
+{
+    auto T = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
+        { { 1, 2, 3, 4 }, { 5, 6, 7, 8 }, { 9, 0, 1, 2 } },
+        { { 3, 4, 5, 6 }, { 7, 8, 9, 0 }, { 1, 2, 3, 4 } }
+    };
+
+    T /= 0.5;
+
+    auto S = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
+        { { 2, 4, 6, 8 }, { 10, 12, 14, 16 }, { 18, 0, 2, 4 } },
+        { { 6, 8, 10, 12 }, { 14, 16, 18, 0 }, { 2, 4, 6, 8 } }
+    };
+
+    auto M = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
+        { { 1, 2, 3, 4 }, { 5, 6, 7, 8 }, { 9, 0, 1, 2 } },
+        { { 3, 4, 5, 6 }, { 7, 8, 9, 0 }, { 1, 2, 3, 4 } }
+    };
+
+    M.GetSubTensor({ 0, 1, 2 }, { 2, 2, 2 }) /= 0.5;
+
+    auto R = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
+        { { 1, 2, 3, 4 }, { 5, 6, 14, 16 }, { 9, 0, 2, 4 } },
+        { { 3, 4, 5, 6 }, { 7, 8, 18, 0 }, { 1, 2, 6, 8 } }
+    };
+
+    testing::ProcessTest("TensorReference::operator/=", S == T && M == R);
+}
+
+template <typename ElementType, math::Dimension dimension0, math::Dimension dimension1, math::Dimension dimension2, math::ImplementationType implementation>
+void TestTensorVectorAddUpdate()
+{
+    auto T = math::Tensor<ElementType, dimension0, dimension1, dimension2>(2, 3, 4);
+
+    auto v1 = math::Vector<ElementType, math::VectorOrientation::row>{ 1, 2 };
+    math::AddUpdate<math::Dimension::row, implementation>(v1, T);
+    auto R1 = math::Tensor<ElementType, dimension0, dimension1, dimension2>{ { { 1, 1, 1, 1 }, { 1, 1, 1, 1 }, { 1, 1, 1, 1 } },
+                                                                             { { 2, 2, 2, 2 }, { 2, 2, 2, 2 }, { 2, 2, 2, 2 } } };
+    testing::ProcessTest("void TestTensorVectorAddUpdate()", T == R1);
+
+    T.Fill(0);
+    auto v2 = math::Vector<ElementType, math::VectorOrientation::row>{ 1, 2, 3 };
+    math::AddUpdate<math::Dimension::column, implementation>(v2, T);
+    auto R2 = math::Tensor<ElementType, dimension0, dimension1, dimension2>{ { { 1, 1, 1, 1 }, { 2, 2, 2, 2 }, { 3, 3, 3, 3 } },
+                                                                             { { 1, 1, 1, 1 }, { 2, 2, 2, 2 }, { 3, 3, 3, 3 } } };
+    testing::ProcessTest("void TestTensorVectorAddUpdate()", T == R2);
+
+    T.Fill(0);
+    auto v3 = math::Vector<ElementType, math::VectorOrientation::row>{ 1, 2, 3, 4 };
+    math::AddUpdate<math::Dimension::channel, implementation>(v3, T);
+    auto R3 = math::Tensor<ElementType, dimension0, dimension1, dimension2>{ { { 1, 2, 3, 4 }, { 1, 2, 3, 4 }, { 1, 2, 3, 4 } },
+                                                                             { { 1, 2, 3, 4 }, { 1, 2, 3, 4 }, { 1, 2, 3, 4 } } };
+    testing::ProcessTest("void TestTensorVectorAddUpdate()", T == R3);
+
+    // subtensors
+    auto TT = math::Tensor<ElementType, dimension0, dimension1, dimension2>(10, 10, 10);
+    auto TR = TT.GetSubTensor({ 5, 3, 1 }, { 2, 3, 4 });
+
+    TR.Fill(0);
+    math::AddUpdate<math::Dimension::row, implementation>(v1, TR);
+    testing::ProcessTest("void TestTensorVectorAddUpdate() with subtensor", TR == R1);
+
+    TR.Fill(0);
+    math::AddUpdate<math::Dimension::column, implementation>(v2, TR);
+    testing::ProcessTest("void TestTensorVectorAddUpdate() with subtensor", TR == R2);
+
+    TR.Fill(0);
+    math::AddUpdate<math::Dimension::channel, implementation>(v3, TR);
+    testing::ProcessTest("void TestTensorVectorAddUpdate() with subtensor", TR == R3);
+}
+
+template <typename ElementType, math::Dimension dimension0, math::Dimension dimension1, math::Dimension dimension2, math::ImplementationType implementation>
+void TestTensorVectorMultiply()
+{
+    auto implementationName = math::Internal::MatrixOperations<implementation>::GetImplementationName();
+
+    auto T1 = math::Tensor<ElementType, dimension0, dimension1, dimension2>(2, 3, 4);
+    T1.Fill(1);
+    auto v1 = math::Vector<ElementType, math::VectorOrientation::row>{ 1, 2 };
+    math::ScaleUpdate<math::Dimension::row, implementation>(v1, T1);
+    auto R1 = math::Tensor<ElementType, dimension0, dimension1, dimension2>{ { { 1, 1, 1, 1 }, { 1, 1, 1, 1 }, { 1, 1, 1, 1 } },
+                                                                             { { 2, 2, 2, 2 }, { 2, 2, 2, 2 }, { 2, 2, 2, 2 } } };
+
+    auto T2 = math::Tensor<ElementType, dimension0, dimension1, dimension2>(2, 3, 4);
+    T2.Fill(1);
+    auto v2 = math::Vector<ElementType, math::VectorOrientation::row>{ 1, 2, 3 };
+    math::ScaleUpdate<math::Dimension::column, implementation>(v2, T2);
+    auto R2 = math::Tensor<ElementType, dimension0, dimension1, dimension2>{ { { 1, 1, 1, 1 }, { 2, 2, 2, 2 }, { 3, 3, 3, 3 } },
+                                                                             { { 1, 1, 1, 1 }, { 2, 2, 2, 2 }, { 3, 3, 3, 3 } } };
+
+    auto T3 = math::Tensor<ElementType, dimension0, dimension1, dimension2>(2, 3, 4);
+    T3.Fill(1);
+    auto v3 = math::Vector<ElementType, math::VectorOrientation::row>{ 1, 2, 3, 4 };
+    math::ScaleUpdate<math::Dimension::channel, implementation>(v3, T3);
+    auto R3 = math::Tensor<ElementType, dimension0, dimension1, dimension2>{ { { 1, 2, 3, 4 }, { 1, 2, 3, 4 }, { 1, 2, 3, 4 } },
+                                                                             { { 1, 2, 3, 4 }, { 1, 2, 3, 4 }, { 1, 2, 3, 4 } } };
+
+    // subtensors
+    auto S1 = math::Tensor<ElementType, dimension0, dimension1, dimension2>(10, 10, 10);
+    auto M1 = S1.GetSubTensor({ 5, 3, 1 }, { 2, 3, 4 });
+    M1.Fill(1);
+    math::ScaleUpdate<math::Dimension::row, implementation>(v1, M1);
+
+    auto S2 = math::Tensor<ElementType, dimension0, dimension1, dimension2>(10, 10, 10);
+    auto M2 = S2.GetSubTensor({ 5, 3, 1 }, { 2, 3, 4 });
+    M2.Fill(1);
+    math::ScaleUpdate<math::Dimension::column, implementation>(v2, M2);
+
+    auto S3 = math::Tensor<ElementType, dimension0, dimension1, dimension2>(10, 10, 10);
+    auto M3 = S3.GetSubTensor({ 5, 3, 1 }, { 2, 3, 4 });
+    M3.Fill(1);
+    math::ScaleUpdate<math::Dimension::channel, implementation>(v3, M3);
+
+    testing::ProcessTest(implementationName + "::Multiply(Vector, Tensor)", T1 == R1 && T2 == R2 && T3 == R3 && M1 == R1 && M2 == R2 && M3 == R3);
+}
+
+template <typename ElementType, math::Dimension dimension0, math::Dimension dimension1, math::Dimension dimension2, math::ImplementationType implementation>
+void TestTensorVectorScaleAddUpdate()
+{
+    auto T = math::Tensor<ElementType, dimension0, dimension1, dimension2>(2, 3, 4);
+    T.Fill(1);
+    auto s1 = math::Vector<ElementType, math::VectorOrientation::row>{ 1, 2 };
+    auto b1 = math::Vector<ElementType, math::VectorOrientation::row>{ 3, 4 };
+    math::ScaleAddUpdate<math::Dimension::row, implementation>(s1, b1, T);
+    auto R1 = math::Tensor<ElementType, dimension0, dimension1, dimension2>{ { { 4, 4, 4, 4 }, { 4, 4, 4, 4 }, { 4, 4, 4, 4 } },
+                                                                             { { 6, 6, 6, 6 }, { 6, 6, 6, 6 }, { 6, 6, 6, 6 } } };
+    testing::ProcessTest("void TestTensorVectorScaleAddUpdate()", T == R1);
+
+    T.Fill(1);
+    auto s2 = math::Vector<ElementType, math::VectorOrientation::row>{ 1, 2, 3 };
+    auto b2 = math::Vector<ElementType, math::VectorOrientation::row>{ 4, 5, 6 };
+    math::ScaleAddUpdate<math::Dimension::column, implementation>(s2, b2, T);
+    auto R2 = math::Tensor<ElementType, dimension0, dimension1, dimension2>{ { { 5, 5, 5, 5 }, { 7, 7, 7, 7 }, { 9, 9, 9, 9 } },
+                                                                             { { 5, 5, 5, 5 }, { 7, 7, 7, 7 }, { 9, 9, 9, 9 } } };
+    testing::ProcessTest("void TestTensorVectorScaleAddUpdate()", T == R2);
+
+    T.Fill(1);
+    auto s3 = math::Vector<ElementType, math::VectorOrientation::row>{ 1, 2, 3, 4 };
+    auto b3 = math::Vector<ElementType, math::VectorOrientation::row>{ 1, 1, 2, 2 };
+    math::ScaleAddUpdate<math::Dimension::channel, implementation>(s3, b3, T);
+    auto R3 = math::Tensor<ElementType, dimension0, dimension1, dimension2>{ { { 2, 3, 5, 6 }, { 2, 3, 5, 6 }, { 2, 3, 5, 6 } },
+                                                                             { { 2, 3, 5, 6 }, { 2, 3, 5, 6 }, { 2, 3, 5, 6 } } };
+    testing::ProcessTest("void TestTensorVectorScaleAddUpdate()", T == R3);
+
+    // subtensors
+    auto TT = math::Tensor<ElementType, dimension0, dimension1, dimension2>(10, 10, 10);
+    auto TR = TT.GetSubTensor({ 5, 3, 1 }, { 2, 3, 4 });
+
+    TR.Fill(1);
+    math::ScaleAddUpdate<math::Dimension::row, implementation>(s1, b1, TR);
+    testing::ProcessTest("void TestTensorVectorScaleAddUpdate() with subtensor", TR == R1);
+
+    TR.Fill(1);
+    math::ScaleAddUpdate<math::Dimension::column, implementation>(s2, b2, TR);
+    testing::ProcessTest("void TestTensorVectorScaleAddUpdate() with subtensor", TR == R2);
+
+    TR.Fill(1);
+    math::ScaleAddUpdate<math::Dimension::channel, implementation>(s3, b3, TR);
+    testing::ProcessTest("void TestTensorVectoScaleAddUpdate() with subtensor", TR == R3);
+}
+
+template <typename ElementType, math::Dimension dimension0, math::Dimension dimension1, math::Dimension dimension2>
+void TestTensorArchiver()
+{
+    math::Tensor<ElementType, dimension0, dimension1, dimension2> T(10, 20, 30);
+
+    T(3, 2, 1) = 2.0;
+    T(4, 3, 2) = 3.0;
+    T(3, 3, 3) = 4.0;
+
+    utilities::SerializationContext context;
+    std::stringstream strstream;
+    utilities::JsonArchiver archiver(strstream);
+
+    math::TensorArchiver::Write(T, "test", archiver);
+    utilities::JsonUnarchiver unarchiver(strstream, context);
+
+    math::Tensor<ElementType, dimension0, dimension1, dimension2> Ta(0, 0, 0);
+    math::TensorArchiver::Read(Ta, "test", unarchiver);
+    testing::ProcessTest("void TestTensorArchiver(), write and read tensor", Ta == T);
+}
+
+#pragma endregion implementation
diff --git a/libraries/math/test/include/Vector_test.h b/libraries/math/test/include/Vector_test.h
index c90998495..a445dbfd4 100644
--- a/libraries/math/test/include/Vector_test.h
+++ b/libraries/math/test/include/Vector_test.h
@@ -232,4 +232,1013 @@ void TestVectorConsecutiveDifferenceUpdate();
 template <typename ElementType>
 void TestVectorArchiver();
 
-#include "../tcc/Vector_test.tcc"
+#pragma region implementation
+
+#include <math/include/VectorOperations.h>
+
+#include <testing/include/testing.h>
+
+#include <utilities/include/JsonArchiver.h>
+
+#include <sstream>
+
+template <typename ElementType>
+void TestVectorIndexer()
+{
+    math::RowVector<ElementType> v{ 1, 2, 3, 4, 5, 6, 7 };
+    auto u = v.GetSubVector(2, 2);
+
+    math::RowMatrix<ElementType> M{ { 1, 2, 3 }, { 4, 5, 6 }, { 7, 8, 9 } };
+    math::ColumnMatrix<ElementType> N(M);
+    auto w = M.GetRow(1);
+    auto z = N.GetRow(1);
+
+    testing::ProcessTest("Vector::Operator[]", v[0] == 1 && v[1] == 2 && v[6] == 7 && u[0] == 3 && u[1] == 4 && w[0] == 4 && w[1] == 5 && w[2] == 6 && z[0] == 4 && z[1] == 5 && z[2] == 6);
+}
+
+template <typename ElementType>
+void TestVectorSize()
+{
+    math::RowVector<ElementType> u{};
+    math::RowVector<ElementType> v{ 1, 2, 3, 4, 5, 6, 7 };
+    auto w = v.GetSubVector(2, 3);
+
+    testing::ProcessTest("Vector::Size", v.Size() == 7 && u.Size() == 0 && w.Size() == 3);
+}
+
+template <typename ElementType>
+void TestVectorGetDataPointer()
+{
+    math::RowVector<ElementType> v{ 1, 2, 3, 4, 5, 6, 7 };
+    auto u = v.GetSubVector(2, 2);
+
+    testing::ProcessTest("Vector::GetDataPointer", &(v[0]) == v.GetDataPointer() && v.GetDataPointer() + 2 == u.GetDataPointer());
+}
+
+template <typename ElementType>
+void TestVectorGetIncrement()
+{
+    math::RowVector<ElementType> v{ 1, 2, 3, 4, 5, 6, 7 };
+    auto u = v.GetSubVector(2, 2);
+
+    math::RowMatrix<ElementType> M{ { 1, 2, 3 }, { 4, 5, 6 }, { 7, 8, 9 } };
+    math::ColumnMatrix<ElementType> N(M);
+    auto w = M.GetRow(1);
+    auto z = N.GetRow(1);
+
+    testing::ProcessTest("Vector::GetIncrement", v.GetIncrement() == 1 && u.GetIncrement() == 1 && w.GetIncrement() == 1 && z.GetIncrement() == 3);
+}
+
+template <typename ElementType>
+void TestVectorNorm0()
+{
+    math::RowVector<ElementType> x{ 0, 1, 0, -2, 0 };
+    auto v = x.GetSubVector(2, 2);
+
+    testing::ProcessTest("Vector::Norm0", x.Norm0() == 2 && v.Norm0() == 1);
+}
+
+template <typename ElementType>
+void TestVectorNorm1()
+{
+    math::RowVector<ElementType> x{ 0, 1, 0, -2, 0 };
+    auto v = x.GetSubVector(2, 2);
+
+    testing::ProcessTest("Vector::Norm1", x.Norm1() == 3 && v.Norm2() == 2);
+}
+
+template <typename ElementType>
+void TestVectorNorm2()
+{
+    math::RowVector<ElementType> x{ 0, 1, 0, -2, 0 };
+    auto v = x.GetSubVector(2, 2);
+
+    testing::ProcessTest("Vector::Norm2", testing::IsEqual(x.Norm2(), static_cast<ElementType>(std::sqrt(5))) && v.Norm2() == 2);
+}
+
+template <typename ElementType>
+void TestVectorNorm2Squared()
+{
+    math::RowVector<ElementType> x{ 0, 1, 0, -2, 0 };
+    auto v = x.GetSubVector(2, 2);
+
+    testing::ProcessTest("Vector::Norm2Squared", x.Norm2Squared() == 5 && v.Norm2Squared() == 4);
+}
+
+template <typename ElementType>
+void TestVectorToArray()
+{
+    std::vector<ElementType> r0{ 41, 47, 53, 59 };
+    std::vector<ElementType> r1{ 15, 25, 23, 33 };
+
+    math::RowVector<ElementType> p(r0);
+    math::ColumnVector<ElementType> q(r1);
+
+    math::Matrix<ElementType, math::MatrixLayout::rowMajor> A{
+        { 41, 47, 53, 59 },
+        { 40, 45, 56, 61 },
+        { 15, 25, 23, 33 },
+    };
+    std::vector<ElementType> r(A.GetRow(0).ToArray());
+    std::vector<ElementType> s(A.GetRow(2).ToArray());
+
+    math::Matrix<ElementType, math::MatrixLayout::columnMajor> B(A);
+    std::vector<ElementType> t(B.GetRow(0).ToArray());
+    std::vector<ElementType> u(B.GetRow(2).ToArray());
+
+    testing::ProcessTest("Vector::ToArray", p.ToArray() == r0 && q.ToArray() == r1 && r == r0 && s == r1 && t == r0 && u == r1);
+}
+
+template <typename ElementType, math::VectorOrientation orientation>
+void TestVectorEqualityOperator()
+{
+    math::Vector<ElementType, orientation> u{ 1, 2, 3, 4, 5 };
+    math::Vector<ElementType, orientation> v{ 1, 2, 3, 4, 5 };
+
+    testing::ProcessTest("Vector::operator==", u == v);
+}
+
+template <typename ElementType, math::VectorOrientation orientation>
+void TestVectorInequalityOperator()
+{
+    math::Vector<ElementType, orientation> u{ 1, 2, 3, 4, 5 };
+    math::Vector<ElementType, orientation> v{ 1, 2, 3, 4, 5 };
+    math::Vector<ElementType, orientation> w{ -1, 2, 3, 4, 5 };
+    math::Vector<ElementType, orientation> z{ 1, 2, 3, 4 };
+
+    testing::ProcessTest("Vector::operator!=", u != w && u != v.Transpose() && u != z);
+}
+
+template <typename ElementType, math::VectorOrientation orientation>
+void TestVectorGetConstReference()
+{
+    math::Vector<ElementType, orientation> u{ 1, 2, 3, 4, 5 };
+    auto v = u.GetConstReference();
+
+    testing::ProcessTest("Vector::GetConstReference", u == v);
+}
+
+template <typename ElementType, math::VectorOrientation orientation>
+void TestVectorGetSubVector()
+{
+    math::Vector<ElementType, orientation> u{ 1, 2, 3, 4, 5 };
+    auto v = u.GetSubVector(2, 2);
+
+    math::Matrix<ElementType, math::MatrixLayout::rowMajor> A{
+        { 41, 47, 53, 59 },
+        { 40, 45, 56, 61 },
+        { 15, 25, 23, 33 },
+    };
+    auto w = A.GetColumn(2);
+    auto z = w.GetSubVector(1, 2);
+
+    testing::ProcessTest("Vector::GetSubVector", v[0] == 3 && v[1] == 4 && z[0] == 56 && z[1] == 23);
+}
+
+template <typename ElementType, math::VectorOrientation orientation>
+void TestVectorTranspose()
+{
+    math::Vector<ElementType, orientation> v{ 1, 2, 3, 4, 5, 6, 7 };
+    auto u = v.Transpose();
+    math::Vector<ElementType, math::TransposeVectorOrientation<orientation>::value> w{ 1, 2, 3, 4, 5, 6, 7 };
+
+    auto x = v.GetSubVector(2, 3).Transpose();
+    math::Vector<ElementType, math::TransposeVectorOrientation<orientation>::value> z{ 3, 4, 5 };
+
+    testing::ProcessTest("Vector::Transpose", u == w && x == z);
+}
+
+template <typename ElementType>
+void TestVectorSwap()
+{
+    math::RowVector<ElementType> v{ 1, 2, 3, 4, 5, 6, 7 };
+    math::RowVector<ElementType> u{ -1, -2, -3, -4, -5, -6, -7 };
+    math::RowVector<ElementType> s{ -1, -2, -3, -4, -5, -6, -7 };
+    math::RowVector<ElementType> t{ 1, 2, 3, 4, 5, 6, 7 };
+    v.Swap(u);
+
+    testing::ProcessTest("Vector::Swap", v == s && u == t);
+}
+
+template <typename ElementType, math::VectorOrientation orientation>
+void TestVectorCopyFrom()
+{
+    math::Vector<ElementType, orientation> v{ 1, 2, 3, 4, 5, 6, 7 };
+    math::Vector<ElementType, orientation> u(7);
+    u.CopyFrom(v);
+
+    math::RowMatrix<ElementType> M{ { 1, 2, 3 }, { 4, 5, 6 }, { 7, 8, 9 } };
+    math::ColumnVector<ElementType> x{ 11, 12, 13 };
+    M.GetColumn(1).CopyFrom(x);
+    math::RowMatrix<ElementType> R{ { 1, 11, 3 }, { 4, 12, 6 }, { 7, 13, 9 } };
+
+    testing::ProcessTest("Vector::CopyFrom", u == v && M == R);
+}
+
+template <typename ElementType, math::VectorOrientation orientation>
+void TestVectorReset()
+{
+    math::Vector<ElementType, orientation> v{ 1, 2, 3, 4, 5, 6, 7 };
+    v.GetSubVector(1, 2).Reset();
+    math::Vector<ElementType, orientation> r{ 1, 0, 0, 4, 5, 6, 7 };
+
+    math::RowMatrix<ElementType> M{ { 1, 2, 3 }, { 4, 5, 6 }, { 7, 8, 9 } };
+    M.GetColumn(1).Reset();
+    M.GetRow(1).Reset();
+    math::RowMatrix<ElementType> R{ { 1, 0, 3 }, { 0, 0, 0 }, { 7, 0, 9 } };
+
+    testing::ProcessTest("Vector::Reset", v == r && M == R);
+}
+
+template <typename ElementType, math::VectorOrientation orientation>
+void TestVectorFill()
+{
+    math::Vector<ElementType, orientation> v(10);
+    v.Fill(2);
+    math::Vector<ElementType, orientation> r{ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 };
+
+    math::RowMatrix<ElementType> M{ { 1, 2, 3 }, { 4, 5, 6 }, { 7, 8, 9 } };
+    M.GetColumn(1).Fill(-1);
+    M.GetRow(1).Fill(1);
+    math::RowMatrix<ElementType> R{ { 1, -1, 3 }, { 1, 1, 1 }, { 7, -1, 9 } };
+
+    testing::ProcessTest("Vector::Fill", v == r && M == R);
+}
+
+template <typename ElementType, math::VectorOrientation orientation>
+void TestVectorGenerate()
+{
+    math::Vector<ElementType, orientation> v{ 1, 2, 3, 4, 5, 6, 7 };
+    v.GetSubVector(1, 2).Generate([]() -> ElementType { return -1; });
+    math::Vector<ElementType, orientation> r{ 1, -1, -1, 4, 5, 6, 7 };
+
+    math::RowMatrix<ElementType> M{ { 1, 2, 3 }, { 4, 5, 6 }, { 7, 8, 9 } };
+    M.GetColumn(1).Generate([]() -> ElementType { return -1.0; });
+    M.GetRow(1).Generate([]() -> ElementType { return 1.0; });
+    math::RowMatrix<ElementType> R{ { 1, -1, 3 }, { 1, 1, 1 }, { 7, -1, 9 } };
+
+    testing::ProcessTest("Vector::Generate", v == r && M == R);
+}
+
+template <typename ElementType, math::VectorOrientation orientation>
+void TestVectorTransform()
+{
+    math::Vector<ElementType, orientation> v{ 1, 2, 3, 4, 5, 6, 7 };
+    v.Transform([](ElementType value) { return value * 2; });
+    math::Vector<ElementType, orientation> u{ 2, 4, 6, 8, 10, 12, 14 };
+
+    math::RowMatrix<ElementType> M{ { 1, 2, 3 }, { 4, 5, 6 }, { 7, 8, 9 } };
+    M.GetColumn(1).Transform([](ElementType value) { return value * 2; });
+    math::ColumnVector<ElementType> w{ 4, 10, 16 };
+
+    math::Vector<ElementType, orientation> z{ 1, -2, 3, -4, -5, 6, -7 };
+    z.Transform(math::AbsoluteValueTransformation<ElementType>);
+    math::Vector<ElementType, orientation> y{ 1, 2, 3, 4, 5, 6, 7 };
+
+    testing::ProcessTest("Vector::Transform", v == u && M.GetColumn(1) == w && z == y);
+}
+
+template <typename ElementType, math::VectorOrientation orientation>
+void TestVectorResize()
+{
+    math::Vector<ElementType, orientation> v{ 1, 2, 3, 4, 5, 6, 7 };
+    v.Resize(3);
+    math::Vector<ElementType, orientation> r{ 1, 2, 3 };
+
+    math::Vector<ElementType, orientation> u{ 1, 2, 3, 4, 5, 6, 7 };
+    u.Resize(10);
+    math::Vector<ElementType, orientation> s{ 1, 2, 3, 4, 5, 6, 7, 0, 0, 0 };
+
+    testing::ProcessTest("Vector::Resize", v.Size() == 3 && v == r && u == s);
+}
+
+template <typename ElementType, math::VectorOrientation orientation>
+void TestVectorPrint()
+{
+    std::stringstream stream;
+    math::Vector<ElementType, orientation> u{ 0, 2, 0, 4, 0, 0, 0 };
+    math::Print(u, stream);
+    auto x = stream.str();
+
+    testing::ProcessTest("Print(Vector)", stream.str() == "{ 0, 2, 0, 4, 0, 0, 0 }");
+}
+
+template <typename ElementType, math::VectorOrientation orientation>
+void TestScalarVectorMultiply()
+{
+    math::Vector<ElementType, orientation> u{ 1, 2, 3, 4, 5 };
+    math::Vector<ElementType, orientation> v{ 2, 0, -1, 0, 1 };
+    u += 2 * v;
+    math::Vector<ElementType, orientation> r{ 5, 2, 1, 4, 7 };
+
+    testing::ProcessTest("scalar * Vector", u == r);
+}
+
+template <typename ElementType, math::VectorOrientation orientation>
+void TestVectorSquare()
+{
+    math::Vector<ElementType, orientation> u(5);
+    math::Vector<ElementType, orientation> v{ 1, -1, 2, -2, 3 };
+    u += Square(v);
+    math::Vector<ElementType, orientation> r{ 1, 1, 4, 4, 9 };
+
+    testing::ProcessTest("Square(Vector)", u == r);
+}
+
+template <typename ElementType, math::VectorOrientation orientation>
+void TestVectorSqrt()
+{
+    math::Vector<ElementType, orientation> u(5);
+    math::Vector<ElementType, orientation> v{ 1, 1, 4, 4, 9 };
+    u += Sqrt(v);
+    math::Vector<ElementType, orientation> r{ 1, 1, 2, 2, 3 };
+
+    math::Vector<ElementType, orientation> w{ 1, 1, 4, 4, 9 };
+    math::TransformUpdate(math::SquareRootTransformation<ElementType>, w);
+
+    testing::ProcessTest("Sqrt(Vector)", testing::IsEqual(u.ToArray(), r.ToArray()) && testing::IsEqual(w.ToArray(), r.ToArray()));
+}
+
+template <typename ElementType, math::VectorOrientation orientation>
+void TestVectorAbs()
+{
+    math::Vector<ElementType, orientation> u(5);
+    math::Vector<ElementType, orientation> v{ 1, -1, 2, -2, 3 };
+    u += Abs(v);
+    math::Vector<ElementType, orientation> r{ 1, 1, 2, 2, 3 };
+
+    math::Vector<ElementType, orientation> w{ 1, -1, 2, -2, 3 };
+    math::TransformUpdate(math::AbsoluteValueTransformation<ElementType>, w);
+
+    testing::ProcessTest("Abs(Vector)", w == r);
+}
+
+template <typename ElementType, math::VectorOrientation orientation>
+void TestVectorPlusEqualsOperator()
+{
+    math::Vector<ElementType, orientation> v{ 1, -1, 2, -2, 3 };
+    v += -2;
+    math::Vector<ElementType, orientation> r{ -1, -3, 0, -4, 1 };
+
+    testing::ProcessTest("Add(scalar, Vector)", v == r);
+}
+
+template <typename ElementType, math::VectorOrientation orientation>
+void TestVectorMinusEqualsOperator()
+{
+    math::Vector<ElementType, orientation> v{ 1, -1, 2, -2, 3 };
+    v -= 2;
+    math::Vector<ElementType, orientation> r{ -1, -3, 0, -4, 1 };
+
+    testing::ProcessTest("Add(scalar, Vector)", v == r);
+}
+
+template <typename ElementType, math::VectorOrientation orientation>
+void TestVectorTimesEqualsOperator()
+{
+    math::Vector<ElementType, orientation> v{ 1, -1, 2, -2, 3 };
+    v *= -2;
+    math::Vector<ElementType, orientation> r{ -2, 2, -4, 4, -6 };
+
+    testing::ProcessTest("Vector::operator*=", v == r);
+}
+
+template <typename ElementType, math::VectorOrientation orientation>
+void TestVectorDivideEqualsOperator()
+{
+    math::Vector<ElementType, orientation> v{ 1, -1, 2, -2, 3 };
+    v /= -0.5;
+    math::Vector<ElementType, orientation> r{ -2, 2, -4, 4, -6 };
+
+    testing::ProcessTest("Vector::operator/=", v == r);
+}
+
+template <typename ElementType, math::VectorOrientation orientation>
+void TestVectorElementwiseMultiplySet()
+{
+    math::Vector<ElementType, orientation> u{ 1, 2, 3, 4, 5 };
+    math::Vector<ElementType, orientation> v{ 2, 0, -1, 0, 1 };
+    math::Vector<ElementType, orientation> w(5);
+    math::ElementwiseMultiplySet(u, v, w);
+    math::Vector<ElementType, orientation> r{ 2, 0, -3, 0, 5 };
+
+    testing::ProcessTest("ElementwiseMultiplySet(Vector, Vector)", w == r);
+}
+
+template <typename ElementType, math::VectorOrientation orientation>
+void TestVectorVectorDot()
+{
+    math::Vector<ElementType, orientation> u{ 1, 2, 3, 4, 5 };
+    math::Vector<ElementType, orientation> v{ 1, -1, 2, -2, 3 };
+    auto result = math::Dot(u, v);
+
+    testing::ProcessTest("Dot(Vector, Vector)", result == 12);
+}
+
+template <typename ElementType, math::MatrixLayout layout, math::ImplementationType implementation>
+void TestVectorVectorOuter()
+{
+    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
+
+    math::ColumnVector<ElementType> u{ 1, 2, 3 };
+    math::RowVector<ElementType> v{ 1, -1 };
+    math::Matrix<ElementType, layout> A(3, 2);
+
+    math::OuterProduct<implementation>(u, v, A);
+
+    math::ColumnMatrix<ElementType> B{ { 1, -1 }, { 2, -2 }, { 3, -3 } };
+    testing::ProcessTest(implementationName + "::OuterProduct(Vector, Vector)", A == B);
+}
+
+template <typename ElementType, math::ImplementationType implementation>
+void TestVectorVectorInner()
+{
+    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
+
+    math::RowVector<ElementType> u{ 1, 2, 3, 4, 5 };
+    math::ColumnVector<ElementType> v{ 1, -1, 2, -2, 3 };
+    ElementType result;
+    math::InnerProduct<implementation>(u, v, result);
+
+    testing::ProcessTest(implementationName + "::InnerProduct(Vector, Vector)", result == 12);
+}
+
+template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
+void TestVectorAddUpdateScalar()
+{
+    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
+
+    ElementType a = 3;
+    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
+    math::AddUpdate(a, u);
+
+    math::Vector<ElementType, orientation> w{ -2, 0, 1, 1 };
+    math::ScaleAddUpdate<implementation>(a, math::OnesVector(), static_cast<ElementType>(1), w);
+
+    math::Vector<ElementType, orientation> r{ 1, 3, 4, 4 };
+    testing::ProcessTest(implementationName + "::AddUpdate(scalar, Vector)", u == r && w == r);
+}
+
+template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
+void TestVectorAddUpdateVector()
+{
+    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
+
+    math::Vector<ElementType, orientation> v{ 1, -1, 2, -2 };
+    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
+    math::AddUpdate<implementation>(v, u);
+
+    math::Vector<ElementType, orientation> w{ -2, 0, 1, 1 };
+    math::ScaleAddUpdate<implementation>(static_cast<ElementType>(1), v, static_cast<ElementType>(1), w);
+
+    math::Vector<ElementType, orientation> r{ -1, -1, 3, -1 };
+    testing::ProcessTest(implementationName + "::AddUpdate(Vector, Vector)", u == r && w == r);
+}
+
+template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
+void TestVectorAddSetScalar()
+{
+    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
+
+    ElementType a = 3.0;
+    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
+    math::Vector<ElementType, orientation> z(4);
+    math::AddSet<implementation>(a, u, z);
+
+    math::Vector<ElementType, orientation> w(4);
+    math::ScaleAddSet<implementation>(a, math::OnesVector(), static_cast<ElementType>(1), u, w);
+
+    math::Vector<ElementType, orientation> r{ 1, 3, 4, 4 };
+    testing::ProcessTest(implementationName + "::AddSet(scalar, Vector, output)", z == r && w == r);
+}
+
+template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
+void TestVectorAddSetScalarZero()
+{
+    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
+
+    ElementType a = 0.0;
+    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
+    math::Vector<ElementType, orientation> z(4);
+    math::AddSet<implementation>(a, u, z);
+
+    math::Vector<ElementType, orientation> w(4);
+    math::ScaleAddSet<implementation>(a, math::OnesVector(), static_cast<ElementType>(1), u, w);
+
+    testing::ProcessTest(implementationName + "::AddSet(0.0, Vector, output)", z == u && w == u);
+}
+
+template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
+void TestVectorAddSetScalarOne()
+{
+    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
+
+    ElementType a = 1.0;
+    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
+    math::Vector<ElementType, orientation> z(4);
+    math::AddSet<implementation>(a, u, z);
+
+    math::Vector<ElementType, orientation> w(4);
+    math::ScaleAddSet<implementation>(a, math::OnesVector(), static_cast<ElementType>(1), u, w);
+
+    math::Vector<ElementType, orientation> r{ -1, 1, 2, 2 };
+    testing::ProcessTest(implementationName + "::AddSet(1.0, Vector, output)", z == r && w == r);
+}
+
+template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
+void TestVectorAddSetVector()
+{
+    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
+
+    math::Vector<ElementType, orientation> v{ 1, -1, 2, -2 };
+    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
+    math::Vector<ElementType, orientation> z(4);
+
+    math::AddSet<implementation>(v, u, z);
+
+    math::Vector<ElementType, orientation> w(4);
+    math::ScaleAddSet<implementation>(static_cast<ElementType>(1), v, static_cast<ElementType>(1), u, w);
+
+    math::Vector<ElementType, orientation> r{ -1, -1, 3, -1 };
+    testing::ProcessTest(implementationName + "::ScaleAddUpdate(1.0, Vector, 1.0, Vector)", z == r && w == r);
+}
+
+template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
+void TestVectorScaleUpdate()
+{
+    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
+
+    ElementType b = 2.0;
+    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
+    math::ScaleUpdate<implementation>(b, u);
+
+    math::Vector<ElementType, orientation> r{ -4, 0, 2, 2 };
+    testing::ProcessTest(implementationName + "::ScaleUpdate(scalar, Vector)", u == r);
+}
+
+template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
+void TestVectorScaleUpdateZero()
+{
+    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
+
+    ElementType b = 0.0;
+    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
+    math::ScaleUpdate<implementation>(b, u);
+
+    math::Vector<ElementType, orientation> r{ 0, 0, 0, 0 };
+    testing::ProcessTest(implementationName + "::ScaleUpdate(0.0, Vector)", u == r);
+}
+
+template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
+void TestVectorScaleUpdateOne()
+{
+    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
+
+    ElementType b = 1.0;
+    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
+    math::ScaleUpdate<implementation>(b, u);
+
+    math::Vector<ElementType, orientation> r{ -2, 0, 1, 1 };
+    testing::ProcessTest(implementationName + "::ScaleUpdate(1.0, Vector)", u == r);
+}
+
+template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
+void TestVectorScaleSet()
+{
+    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
+
+    ElementType a = 3.0;
+    math::Vector<ElementType, orientation> v{ 1, -1, 2, -2 };
+    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
+    math::ScaleSet<implementation>(a, v, u);
+
+    math::Vector<ElementType, orientation> r{ 3, -3, 6, -6 };
+    testing::ProcessTest(implementationName + "::ScaleSet(scalar, Vector, Vector)", u == r);
+}
+
+template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
+void TestVectorScaleSetZero()
+{
+    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
+
+    ElementType a = 0.0;
+    math::Vector<ElementType, orientation> v{ 1, -1, 2, -2 };
+    math::Vector<ElementType, orientation> u{ 2, 0, 1, 1 };
+    math::ScaleSet<implementation>(a, v, u);
+
+    math::Vector<ElementType, orientation> r{ 0, 0, 0, 0 };
+    testing::ProcessTest(implementationName + "::ScaleSet(0.0, Vector, Vector)", u == r);
+}
+
+template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
+void TestVectorScaleSetOne()
+{
+    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
+
+    ElementType a = 1.0;
+    math::Vector<ElementType, orientation> v{ 1, -1, 2, -2 };
+    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
+    math::ScaleSet<implementation>(a, v, u);
+
+    math::Vector<ElementType, orientation> r{ 1, -1, 2, -2 };
+    testing::ProcessTest(implementationName + "::ScaleSet(1.0, Vector, Vector)", u == r);
+}
+
+template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
+void TestVectorScaleAddUpdateScalarVectorOne()
+{
+    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
+
+    ElementType a = 3.0;
+    math::Vector<ElementType, orientation> v{ 1, -1, 2, -2 };
+    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
+    math::ScaleAddUpdate<implementation>(a, v, math::One(), u);
+
+    math::Vector<ElementType, orientation> w{ -2, 0, 1, 1 };
+    math::ScaleAddUpdate<implementation>(a, v, static_cast<ElementType>(1), w);
+
+    math::Vector<ElementType, orientation> r{ 1, -3, 7, -5 };
+    testing::ProcessTest(implementationName + "::ScaleAddUpdate(scalar, Vector, 1.0, Vector)", u == r && w == r);
+}
+
+template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
+void TestVectorScaleAddUpdateScalarOnesScalar()
+{
+    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
+
+    ElementType a = 3.0;
+    ElementType b = 2.0;
+    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
+    math::ScaleAddUpdate<implementation>(a, math::OnesVector(), b, u);
+
+    math::Vector<ElementType, orientation> r{ -1, 3, 5, 5 };
+    testing::ProcessTest(implementationName + "::ScaleAddUpdate(scalar, Ones, scalar, Vector)", u == r);
+}
+
+template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
+void TestVectorScaleAddUpdateOneVectorScalar()
+{
+    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
+
+    math::Vector<ElementType, orientation> v{ 1, -1, 2, -2 };
+    ElementType b = 2.0;
+    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
+    math::ScaleAddUpdate<implementation>(math::One(), v, b, u);
+
+    math::Vector<ElementType, orientation> w{ -2, 0, 1, 1 };
+    math::ScaleAddUpdate<implementation>(static_cast<ElementType>(1), v, b, w);
+
+    math::Vector<ElementType, orientation> r{ -3, -1, 4, 0 };
+    testing::ProcessTest(implementationName + "::ScaleAddUpdate(1.0, Vector, scalar, Vector)", u == r && w == r);
+}
+
+template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
+void TestVectorScaleAddUpdateScalarVectorScalar()
+{
+    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
+
+    ElementType a = 3.0;
+    math::Vector<ElementType, orientation> v{ 1, -1, 2, -2 };
+    ElementType b = 2.0;
+    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
+
+    math::ScaleAddUpdate<implementation>(a, v, b, u);
+
+    math::Vector<ElementType, orientation> r{ -1, -3, 8, -4 };
+    testing::ProcessTest(implementationName + "::ScaleAddUpdate(scalar, Vector, scalar, Vector)", u == r);
+}
+
+template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
+void TestVectorScaleAddSetOnes()
+{
+    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
+
+    ElementType a = 3.0;
+    ElementType b = 2.0;
+    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
+    math::Vector<ElementType, orientation> z(4);
+
+    math::ScaleAddSet<implementation>(a, math::OnesVector(), b, u, z);
+
+    math::Vector<ElementType, orientation> r{ -1, 3, 5, 5 };
+    testing::ProcessTest(implementationName + "::ScaleAddSet(scalar, ones, scalar, Vector, output)", z == r);
+}
+
+template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
+void TestVectorScaleAddSetOnesScalarZero()
+{
+    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
+
+    ElementType a = 3.0;
+    ElementType b = 0.0;
+    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
+    math::Vector<ElementType, orientation> z(4);
+
+    math::ScaleAddSet<implementation>(a, math::OnesVector(), b, u, z);
+
+    math::Vector<ElementType, orientation> r{ 3, 3, 3, 3 };
+    testing::ProcessTest(implementationName + "::ScaleAddSet(scalar, ones, 0.0, Vector, output)", z == r);
+}
+
+template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
+void TestVectorScaleAddSetOnesScalarOne()
+{
+    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
+
+    ElementType a = 3.0;
+    ElementType b = 1.0;
+    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
+    math::Vector<ElementType, orientation> z(4);
+
+    math::ScaleAddSet<implementation>(a, math::OnesVector(), b, u, z);
+
+    math::Vector<ElementType, orientation> r{ 1, 3, 4, 4 };
+    testing::ProcessTest(implementationName + "::ScaleAddSet(scalar, ones, 1.0, Vector, output)", z == r);
+}
+
+template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
+void TestVectorScaleAddSetOnesZeroScalar()
+{
+    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
+
+    ElementType a = 0.0;
+    ElementType b = 2.0;
+    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
+    math::Vector<ElementType, orientation> z(4);
+
+    math::ScaleAddSet<implementation>(a, math::OnesVector(), b, u, z);
+
+    math::Vector<ElementType, orientation> r{ -4, 0, 2, 2 };
+    testing::ProcessTest(implementationName + "::ScaleAddSet(0.0, ones, scalar, Vector, output)", z == r);
+}
+
+template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
+void TestVectorScaleAddSetOnesOneScalar()
+{
+    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
+
+    ElementType a = 1.0;
+    ElementType b = 2.0;
+    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
+    math::Vector<ElementType, orientation> z(4);
+
+    math::ScaleAddSet<implementation>(a, math::OnesVector(), b, u, z);
+
+    math::Vector<ElementType, orientation> r{ -3, 1, 3, 3 };
+    testing::ProcessTest(implementationName + "::ScaleAddSet(1.0, ones, scalar, Vector, output)", z == r);
+}
+
+template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
+void TestVectorScaleAddSetOnesZeroOne()
+{
+    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
+
+    ElementType a = 0.0;
+    ElementType b = 1.0;
+    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
+    math::Vector<ElementType, orientation> z(4);
+
+    math::ScaleAddSet<implementation>(a, math::OnesVector(), b, u, z);
+
+    math::Vector<ElementType, orientation> r{ -2, 0, 1, 1 };
+    testing::ProcessTest(implementationName + "::ScaleAddSet(0.0, ones, 1.0, Vector, output)", z == r);
+}
+
+template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
+void TestVectorScaleAddSetOnesOneZero()
+{
+    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
+
+    ElementType a = 1.0;
+    ElementType b = 0.0;
+    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
+    math::Vector<ElementType, orientation> z(4);
+
+    math::ScaleAddSet<implementation>(a, math::OnesVector(), b, u, z);
+
+    math::Vector<ElementType, orientation> r{ 1, 1, 1, 1 };
+    testing::ProcessTest(implementationName + "::ScaleAddSet(1.0, ones, 0.0, Vector, output)", z == r);
+}
+
+template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
+void TestVectorScaleAddSetOnesOneOne()
+{
+    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
+
+    ElementType a = 1.0;
+    ElementType b = 1.0;
+    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
+    math::Vector<ElementType, orientation> z(4);
+
+    math::ScaleAddSet<implementation>(a, math::OnesVector(), b, u, z);
+
+    math::Vector<ElementType, orientation> r{ -1, 1, 2, 2 };
+    testing::ProcessTest(implementationName + "::ScaleAddSet(1.0, ones, 1.0, Vector, output)", z == r);
+}
+
+template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
+void TestVectorScaleAddSetOnesZeroZero()
+{
+    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
+
+    ElementType a = 0.0;
+    ElementType b = 0.0;
+    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
+    math::Vector<ElementType, orientation> z(4);
+
+    math::ScaleAddSet<implementation>(a, math::OnesVector(), b, u, z);
+
+    math::Vector<ElementType, orientation> r{ 0, 0, 0, 0 };
+    testing::ProcessTest(implementationName + "::ScaleAddSet(0.0, ones, 0.0, Vector, output)", z == r);
+}
+
+template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
+void TestVectorScaleAddSetVector()
+{
+    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
+
+    ElementType a = 3.0;
+    math::Vector<ElementType, orientation> v{ 1, -1, 1, -1 };
+    ElementType b = 2.0;
+    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
+    math::Vector<ElementType, orientation> z(4);
+
+    math::ScaleAddSet<implementation>(a, v, b, u, z);
+
+    math::Vector<ElementType, orientation> r{ -1, -3, 5, -1 };
+    testing::ProcessTest(implementationName + "::ScaleAddSet(scalar, Vector, scalar, Vector, output)", z == r);
+}
+
+template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
+void TestVectorScaleAddSetVectorScalarZero()
+{
+    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
+
+    ElementType a = 3.0;
+    math::Vector<ElementType, orientation> v{ 1, -1, 1, -1 };
+    ElementType b = 0.0;
+    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
+    math::Vector<ElementType, orientation> z(4);
+
+    math::ScaleAddSet<implementation>(a, v, b, u, z);
+
+    math::Vector<ElementType, orientation> r{ 3, -3, 3, -3 };
+    testing::ProcessTest(implementationName + "::ScaleAddSet(scalar, Vector, 0.0, Vector, output)", z == r);
+}
+
+template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
+void TestVectorScaleAddSetVectorScalarOne()
+{
+    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
+
+    ElementType a = 3.0;
+    math::Vector<ElementType, orientation> v{ 1, -1, 1, -1 };
+    ElementType b = 1.0;
+    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
+    math::Vector<ElementType, orientation> z(4);
+
+    math::ScaleAddSet<implementation>(a, v, b, u, z);
+
+    math::Vector<ElementType, orientation> r{ 1, -3, 4, -2 };
+    testing::ProcessTest(implementationName + "::ScaleAddSet(scalar, Vector, 1.0, Vector, output)", z == r);
+}
+
+template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
+void TestVectorScaleAddSetVectorZeroScalar()
+{
+    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
+
+    ElementType a = 0.0;
+    math::Vector<ElementType, orientation> v{ 1, -1, 1, -1 };
+    ElementType b = 2.0;
+    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
+    math::Vector<ElementType, orientation> z(4);
+
+    math::ScaleAddSet<implementation>(a, v, b, u, z);
+
+    math::Vector<ElementType, orientation> r{ -4, 0, 2, 2 };
+    testing::ProcessTest(implementationName + "::ScaleAddSet(0.0, Vector, scalar, Vector, output)", z == r);
+}
+
+template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
+void TestVectorScaleAddSetVectorOneScalar()
+{
+    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
+
+    ElementType a = 1.0;
+    math::Vector<ElementType, orientation> v{ 1, -1, 1, -1 };
+    ElementType b = 2.0;
+    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
+    math::Vector<ElementType, orientation> z(4);
+
+    math::ScaleAddSet<implementation>(a, v, b, u, z);
+
+    math::Vector<ElementType, orientation> r{ -3, -1, 3, 1 };
+    testing::ProcessTest(implementationName + "::ScaleAddSet(1.0, Vector, scalar, Vector, output)", z == r);
+}
+
+template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
+void TestVectorScaleAddSetVectorZeroOne()
+{
+    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
+
+    ElementType a = 0.0;
+    math::Vector<ElementType, orientation> v{ 1, -1, 1, -1 };
+    ElementType b = 1.0;
+    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
+    math::Vector<ElementType, orientation> z(4);
+
+    math::ScaleAddSet<implementation>(a, v, b, u, z);
+
+    math::Vector<ElementType, orientation> r{ -2, 0, 1, 1 };
+    testing::ProcessTest(implementationName + "::ScaleAddSet(0.0, Vector, 1.0, Vector, output)", z == r);
+}
+
+template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
+void TestVectorScaleAddSetVectorOneZero()
+{
+    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
+
+    ElementType a = 1.0;
+    math::Vector<ElementType, orientation> v{ 1, -1, 1, -1 };
+    ElementType b = 0.0;
+    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
+    math::Vector<ElementType, orientation> z(4);
+
+    math::ScaleAddSet<implementation>(a, v, b, u, z);
+
+    math::Vector<ElementType, orientation> r{ 1, -1, 1, -1 };
+    testing::ProcessTest(implementationName + "::ScaleAddSet(1.0, Vector, 0.0, Vector, output)", z == r);
+}
+
+template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
+void TestVectorScaleAddSetVectorOneOne()
+{
+    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
+
+    ElementType a = 1.0;
+    math::Vector<ElementType, orientation> v{ 1, -1, 1, -1 };
+    ElementType b = 1.0;
+    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
+    math::Vector<ElementType, orientation> z(4);
+
+    math::ScaleAddSet<implementation>(a, v, b, u, z);
+
+    math::Vector<ElementType, orientation> r{ -1, -1, 2, 0 };
+    testing::ProcessTest(implementationName + "::ScaleAddSet(1.0, Vector, 1.0, Vector, output)", z == r);
+}
+
+template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
+void TestVectorScaleAddSetVectorZeroZero()
+{
+    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
+
+    ElementType a = 0.0;
+    math::Vector<ElementType, orientation> v{ 1, -1, 1, -1 };
+    ElementType b = 0.0;
+    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
+    math::Vector<ElementType, orientation> z(4);
+
+    math::ScaleAddSet<implementation>(a, v, b, u, z);
+
+    math::Vector<ElementType, orientation> r{ 0, 0, 0, 0 };
+    testing::ProcessTest(implementationName + "::ScaleAddSet(0.0, Vector, 0.0, Vector, output)", z == r);
+}
+
+template <typename ElementType, math::VectorOrientation orientation>
+void TestVectorCumulativeSumUpdate()
+{
+    math::Vector<ElementType, orientation> v{ 1, -1, 3, 2 };
+    math::CumulativeSumUpdate(v);
+    math::Vector<ElementType, orientation> r{ 1, 0, 3, 5 };
+    testing::ProcessTest("CumulativeSumUpdate(Vector)", v == r);
+}
+
+template <typename ElementType, math::VectorOrientation orientation>
+void TestVectorConsecutiveDifferenceUpdate()
+{
+    math::Vector<ElementType, orientation> v{ 1, -1, 3, 2 };
+    math::ConsecutiveDifferenceUpdate(v);
+    math::Vector<ElementType, orientation> r{ 1, -2, 4, -1 };
+    testing::ProcessTest("ConsecutiveDifferenceUpdate(Vector)", v == r);
+}
+
+template <typename ElementType, math::VectorOrientation orientation>
+void TestVectorArchiver()
+{
+    math::Vector<ElementType, orientation> V{ 1, 2, 3, 4, 5, 5, 4, 3, 2, 1 };
+
+    utilities::SerializationContext context;
+    std::stringstream strstream;
+    utilities::JsonArchiver archiver(strstream);
+
+    math::VectorArchiver::Write(V, "test", archiver);
+    utilities::JsonUnarchiver unarchiver(strstream, context);
+
+    math::Vector<ElementType, orientation> Va(0);
+    math::VectorArchiver::Read(Va, "test", unarchiver);
+
+    testing::ProcessTest("VectorArchiver", Va == V);
+}
+
+#pragma endregion implementation
diff --git a/libraries/math/test/include/math_profile.h b/libraries/math/test/include/math_profile.h
index 5f17e4ed1..39d08550a 100644
--- a/libraries/math/test/include/math_profile.h
+++ b/libraries/math/test/include/math_profile.h
@@ -30,4 +30,200 @@ void ProfileMatrixVectorMultiplyScaleAddUpdate(size_t numRows, size_t numColumns
 template <typename ElementType, math::MatrixLayout layout1, math::MatrixLayout layout2>
 void ProfileMatrixMatrixMultiplyScaleAddUpdate(size_t numRows, size_t numColumns, size_t numColumns2, size_t repetitions, std::string seed = "123ABC");
 
-#include "../tcc/math_profile.tcc"
+#pragma region implementation
+
+#include <math/include/MatrixOperations.h>
+#include <math/include/Vector.h>
+#include <math/include/VectorOperations.h>
+
+#include <utilities/include/RandomEngines.h>
+
+#include <chrono>
+#include <iostream>
+#include <random>
+
+using namespace ell;
+
+template <typename Function>
+double GetTime(Function function, size_t repetitions)
+{
+    // warm up
+    function();
+    function();
+    function();
+
+    // timed reps
+    auto start = std::chrono::high_resolution_clock::now();
+    for (size_t t = 0; t < repetitions; ++t)
+    {
+        function();
+    }
+    auto finish = std::chrono::high_resolution_clock::now();
+    auto duration = std::chrono::duration_cast<std::chrono::microseconds>(finish - start).count();
+    return static_cast<double>(duration);
+}
+
+void PrintLine(std::string functionName, double native, double singleBlas, double multiBlas)
+{
+    std::cout << functionName
+              << "\tnative:1.0\tsingleBlas:" << singleBlas / native
+              << "\tmultiBlas:" << multiBlas / native
+              << std::endl;
+}
+
+template <typename ElementType, typename VectorAType, math::VectorOrientation orientation>
+void ProfileVectorScaleAddWorker(ElementType scalarA, VectorAType vectorA, ElementType scalarB, math::VectorReference<ElementType, orientation> vectorB, std::string description, size_t repetitions)
+{
+    double native = GetTime([&]() { math::ScaleAddUpdate<math::ImplementationType::native>(scalarA, vectorA, scalarB, vectorB); }, repetitions);
+    math::Blas::SetNumThreads(0);
+    double multiBlas = GetTime([&]() { math::ScaleAddUpdate<math::ImplementationType::openBlas>(scalarA, vectorA, scalarB, vectorB); }, repetitions);
+    math::Blas::SetNumThreads(1);
+    double singleBlas = GetTime([&]() { math::ScaleAddUpdate<math::ImplementationType::openBlas>(scalarA, vectorA, scalarB, vectorB); }, repetitions);
+
+    std::string type = std::string("<") + typeid(ElementType).name() + ">";
+    PrintLine("ScaleAddUpdate" + type + "(" + description + ", vector)", native, singleBlas, multiBlas);
+}
+
+template <typename ElementType>
+void ProfileVectorScaleAdd(size_t size, size_t repetitions, std::string seed)
+{
+    auto engine = utilities::GetRandomEngine(seed);
+    std::uniform_real_distribution<ElementType> uniform(-1, 1);
+    auto generator = [&]() { return uniform(engine); };
+
+    math::RowVector<ElementType> v(size);
+    v.Generate(generator);
+
+    math::RowVector<ElementType> u(size);
+    u.Generate(generator);
+
+    ElementType scalar = static_cast<ElementType>(-7.3);
+    ElementType one = 1.0;
+
+    ProfileVectorScaleAddWorker(scalar, math::OnesVector(), one, u, "scalar, ones, one", repetitions);
+    ProfileVectorScaleAddWorker(one, v, one, u, "one, vector, one", repetitions);
+    ProfileVectorScaleAddWorker(scalar, v, one, u, "scalar, vector, one", repetitions);
+    ProfileVectorScaleAddWorker(scalar, math::OnesVector(), scalar, u, "scalar, ones, scalar", repetitions);
+    ProfileVectorScaleAddWorker(one, v, scalar, u, "one, vector, scalar", repetitions);
+    ProfileVectorScaleAddWorker(scalar, v, scalar, u, "scalar, vector, scalar", repetitions);
+}
+
+template <typename ElementType>
+void ProfileVectorInner(size_t size, size_t repetitions, std::string seed)
+{
+    auto engine = utilities::GetRandomEngine(seed);
+    std::uniform_real_distribution<ElementType> uniform(-1, 1);
+    auto generator = [&]() { return uniform(engine); };
+
+    math::RowVector<ElementType> u(size);
+    u.Generate(generator);
+
+    math::ColumnVector<ElementType> v(size);
+    v.Generate(generator);
+
+    ElementType result;
+    double native = GetTime([&]() { math::Internal::VectorOperations<math::ImplementationType::native>::InnerProduct(u, v, result); }, repetitions);
+    math::Blas::SetNumThreads(1);
+    double singleBlas = GetTime([&]() { math::Internal::VectorOperations<math::ImplementationType::openBlas>::InnerProduct(u, v, result); }, repetitions);
+    math::Blas::SetNumThreads(0);
+    double multiBlas = GetTime([&]() { math::Internal::VectorOperations<math::ImplementationType::openBlas>::InnerProduct(u, v, result); }, repetitions);
+
+    std::string type = std::string("<") + typeid(ElementType).name() + ">";
+    std::string vector = "Vector" + type + "[" + std::to_string(size) + "]";
+    PrintLine("Dot(" + vector + ", " + vector + ")", native, singleBlas, multiBlas);
+}
+
+template <typename ElementType, math::MatrixLayout layout>
+void ProfileVectorOuter(size_t size, size_t repetitions, std::string seed)
+{
+    auto engine = utilities::GetRandomEngine(seed);
+    std::uniform_real_distribution<ElementType> uniform(-1, 1);
+    auto generator = [&]() { return uniform(engine); };
+
+    math::ColumnVector<ElementType> u(size);
+    u.Generate(generator);
+
+    math::RowVector<ElementType> v(size);
+    v.Generate(generator);
+
+    math::Matrix<ElementType, layout> S(size, size);
+
+    double native = GetTime([&]() { math::Internal::VectorOperations<math::ImplementationType::native>::OuterProduct(u, v, S); }, repetitions);
+    math::Blas::SetNumThreads(1);
+    double singleBlas = GetTime([&]() { math::Internal::VectorOperations<math::ImplementationType::openBlas>::OuterProduct(u, v, S); }, repetitions);
+    math::Blas::SetNumThreads(0);
+    double multiBlas = GetTime([&]() { math::Internal::VectorOperations<math::ImplementationType::openBlas>::OuterProduct(u, v, S); }, repetitions);
+
+    std::string type = std::string("<") + typeid(ElementType).name() + ">";
+    std::string vector = "Vector" + type + "[" + std::to_string(size) + "]";
+    std::string functionName = "OuterProduct(" + vector + ", " + vector + ")";
+    PrintLine(functionName, native, singleBlas, multiBlas);
+}
+
+template <typename ElementType, math::MatrixLayout layout>
+void ProfileMatrixVectorMultiplyScaleAddUpdate(size_t numRows, size_t numColumns, size_t repetitions, std::string seed)
+{
+    auto engine = utilities::GetRandomEngine(seed);
+    std::uniform_real_distribution<ElementType> uniform(-1, 1);
+    auto generator = [&]() { return uniform(engine); };
+
+    math::Matrix<ElementType, layout> M(numRows, numColumns);
+    M.Generate(generator);
+
+    math::ColumnVector<ElementType> v(numColumns);
+    v.Generate(generator);
+
+    math::ColumnVector<ElementType> u(numRows);
+    u.Generate(generator);
+
+    auto s = generator();
+    auto t = generator();
+
+    double native = GetTime([&]() { math::Internal::MatrixOperations<math::ImplementationType::native>::MultiplyScaleAddUpdate(s, M, v, t, u); }, repetitions);
+    math::Blas::SetNumThreads(1);
+    double singleBlas = GetTime([&]() { math::Internal::MatrixOperations<math::ImplementationType::openBlas>::MultiplyScaleAddUpdate(s, M, v, t, u); }, repetitions);
+    math::Blas::SetNumThreads(0);
+    double multiBlas = GetTime([&]() { math::Internal::MatrixOperations<math::ImplementationType::openBlas>::MultiplyScaleAddUpdate(s, M, v, t, u); }, repetitions);
+
+    std::string type = std::string("<") + typeid(ElementType).name() + ">";
+    std::string vector1 = "Vector" + type + "[" + std::to_string(numColumns) + "]";
+    std::string vector2 = "Vector" + type + "[" + std::to_string(numRows) + "]";
+    std::string matrix = "Matrix" + type + "[" + std::to_string(numRows) + ", " + std::to_string(numColumns) + "]";
+    std::string functionName = "MultiplyScaleAddUpdate(scalar, " + matrix + ", " + vector1 + ", scalar, " + vector2 + ")";
+    PrintLine(functionName, native, singleBlas, multiBlas);
+}
+
+template <typename ElementType, math::MatrixLayout layout1, math::MatrixLayout layout2>
+void ProfileMatrixMatrixMultiplyScaleAddUpdate(size_t numRows, size_t numColumns, size_t numColumns2, size_t repetitions, std::string seed)
+{
+    auto engine = utilities::GetRandomEngine(seed);
+    std::uniform_real_distribution<ElementType> uniform(-1, 1);
+    auto generator = [&]() { return uniform(engine); };
+
+    math::Matrix<ElementType, layout1> M(numRows, numColumns);
+    M.Generate(generator);
+
+    math::Matrix<ElementType, layout2> N(numColumns, numColumns2);
+    N.Generate(generator);
+
+    math::Matrix<ElementType, layout1> T(numRows, numColumns2);
+    T.Generate(generator);
+
+    auto a = generator();
+    auto b = generator();
+
+    double native = GetTime([&]() { math::Internal::MatrixOperations<math::ImplementationType::native>::MultiplyScaleAddUpdate(a, M, N, b, T); }, repetitions);
+    math::Blas::SetNumThreads(1);
+    double singleBlas = GetTime([&]() { math::Internal::MatrixOperations<math::ImplementationType::openBlas>::MultiplyScaleAddUpdate(a, M, N, b, T); }, repetitions);
+    math::Blas::SetNumThreads(0);
+    double multiBlas = GetTime([&]() { math::Internal::MatrixOperations<math::ImplementationType::openBlas>::MultiplyScaleAddUpdate(a, M, N, b, T); }, repetitions);
+
+    std::string type = std::string("<") + typeid(ElementType).name() + ">";
+    std::string matrix1 = "Matrix" + type + "[" + std::to_string(numRows) + ", " + std::to_string(numColumns) + "]";
+    std::string matrix2 = "Matrix" + type + "[" + std::to_string(numColumns) + ", " + std::to_string(numColumns2) + "]";
+    std::string matrix3 = "Matrix" + type + "[" + std::to_string(numRows) + ", " + std::to_string(numColumns2) + "]";
+    std::string functionName = "MultiplyScaleAddUpdate(scalar, " + matrix1 + ", " + matrix2 + ", scalar, " + matrix3 + ")";
+    PrintLine(functionName, native, singleBlas, multiBlas);
+}
+
+#pragma endregion implementation
diff --git a/libraries/math/test/tcc/Matrix_test.tcc b/libraries/math/test/tcc/Matrix_test.tcc
deleted file mode 100644
index 8a528c93e..000000000
--- a/libraries/math/test/tcc/Matrix_test.tcc
+++ /dev/null
@@ -1,1436 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     Matrix_test.tcc (math_test)
-//  Authors:  Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template <typename ElementType, math::MatrixLayout layout>
-void TestMatrixNumRows()
-{
-    math::Matrix<ElementType, layout> M{
-        { 1, 0, 4, 0 },
-        { 0, 0, 0, 0 },
-        { 0, 0, 0, 7 }
-    };
-
-    auto N = M.GetSubMatrix(0, 1, 2, 2);
-
-    testing::ProcessTest("Matrix::Operator", M.NumRows() == 3 && N.NumRows() == 2);
-}
-
-template <typename ElementType, math::MatrixLayout layout>
-void TestMatrixNumColumns()
-{
-    math::Matrix<ElementType, layout> M{
-        { 1, 0, 4, 0 },
-        { 0, 0, 0, 0 },
-        { 0, 0, 0, 7 }
-    };
-
-    auto N = M.GetSubMatrix(0, 1, 2, 2);
-
-    testing::ProcessTest("Matrix::NumColumns", M.NumColumns() == 4 && N.NumColumns() == 2);
-}
-
-template <typename ElementType, math::MatrixLayout layout>
-void TestMatrixSize()
-{
-    math::Matrix<ElementType, layout> M{
-        { 1, 0, 4, 0 },
-        { 0, 0, 0, 0 },
-        { 0, 0, 0, 7 }
-    };
-
-    auto N = M.GetSubMatrix(0, 1, 2, 2);
-
-    testing::ProcessTest("Matrix::Size", M.Size() == 12 && N.Size() == 4);
-}
-
-template <typename ElementType>
-void TestMatrixGetIncrement()
-{
-    math::ColumnMatrix<ElementType> M{
-        { 1, 0, 4, 0 },
-        { 0, 0, 0, 0 },
-        { 0, 0, 0, 7 }
-    };
-
-    math::RowMatrix<ElementType> N{
-        { 1, 0, 4, 0 },
-        { 0, 0, 0, 0 },
-        { 0, 0, 0, 7 }
-    };
-
-    testing::ProcessTest("Matrix::GetIncrement", M.GetIncrement() == 3 && N.GetIncrement() == 4);
-}
-
-template <typename ElementType>
-void TestMatrixGetMinorSize()
-{
-    math::ColumnMatrix<ElementType> M{
-        { 1, 0, 4, 0 },
-        { 0, 0, 0, 0 },
-        { 0, 0, 0, 7 }
-    };
-
-    math::RowMatrix<ElementType> N{
-        { 1, 0, 4, 0 },
-        { 0, 0, 0, 0 },
-        { 0, 0, 0, 7 }
-    };
-
-    testing::ProcessTest("Matrix::GetMinorSize", M.GetMinorSize() == 4 && N.GetMinorSize() == 3);
-}
-
-template <typename ElementType>
-void TestMatrixGetMajorSize()
-{
-    math::ColumnMatrix<ElementType> M{
-        { 1, 0, 4, 0 },
-        { 0, 0, 0, 0 },
-        { 0, 0, 0, 7 }
-    };
-
-    math::RowMatrix<ElementType> N{
-        { 1, 0, 4, 0 },
-        { 0, 0, 0, 0 },
-        { 0, 0, 0, 7 }
-    };
-
-    testing::ProcessTest("Matrix::GetMajorSize", M.GetMajorSize() == 3 && N.GetMajorSize() == 4);
-}
-
-template <typename ElementType>
-void TestMatrixGetRowIncrement()
-{
-    math::ColumnMatrix<ElementType> M{
-        { 1, 0, 4, 0 },
-        { 0, 0, 0, 0 },
-        { 0, 0, 0, 7 }
-    };
-
-    math::RowMatrix<ElementType> N{
-        { 1, 0, 4, 0 },
-        { 0, 0, 0, 0 },
-        { 0, 0, 0, 7 }
-    };
-
-    testing::ProcessTest("Matrix::GetRowIncrement", M.GetRowIncrement() == 1 && N.GetRowIncrement() == 4);
-}
-
-template <typename ElementType>
-void TestMatrixGetColumnIncrement()
-{
-    math::ColumnMatrix<ElementType> M{
-        { 1, 0, 4, 0 },
-        { 0, 0, 0, 0 },
-        { 0, 0, 0, 7 }
-    };
-
-    math::RowMatrix<ElementType> N{
-        { 1, 0, 4, 0 },
-        { 0, 0, 0, 0 },
-        { 0, 0, 0, 7 }
-    };
-
-    testing::ProcessTest("Matrix::GetColumnIncrement", M.GetColumnIncrement() == 3 && N.GetColumnIncrement() == 1);
-}
-
-template <typename ElementType, math::MatrixLayout layout>
-void TestMatrixIndexer()
-{
-    math::Matrix<ElementType, layout> M(3, 4);
-    M(0, 0) = 1;
-    M(0, 2) = 4;
-    M(2, 3) = 7;
-
-    auto N = M.GetSubMatrix(0, 1, 2, 2);
-    N(1, 0) = 3;
-
-    math::ColumnMatrix<ElementType> R{
-        { 1, 0, 4, 0 },
-        { 0, 3, 0, 0 },
-        { 0, 0, 0, 7 }
-    };
-
-    testing::ProcessTest("Matrix::Operator()", M == R);
-}
-
-template <typename ElementType, math::MatrixLayout layout>
-void TestMatrixGetDataPointer()
-{
-    math::Matrix<ElementType, layout> M{
-        { 1, 0, 4, 0 },
-        { 0, 0, 0, 0 },
-        { 0, 0, 0, 7 }
-    };
-
-    auto S = M.GetSubMatrix(1, 1, 2, 2);
-
-    testing::ProcessTest("Matrix::GetDataPointer", M.GetDataPointer() == &(M(0, 0)) && S.GetDataPointer() == &(M(1, 1)) && M.GetConstDataPointer() == &(M(0, 0)) && S.GetConstDataPointer() == &(M(1, 1)));
-}
-
-template <typename ElementType, math::MatrixLayout layout>
-void TestMatrixGetLayout()
-{
-    math::Matrix<ElementType, layout> M{
-        { 1, 0, 4, 0 },
-        { 0, 0, 0, 0 },
-        { 0, 0, 0, 7 }
-    };
-
-    auto N = M.GetSubMatrix(0, 1, 2, 2);
-
-    testing::ProcessTest("Matrix::GetLayout", M.GetLayout() == layout && N.GetLayout() == layout);
-}
-
-template <typename ElementType, math::MatrixLayout layout>
-void TestMatrixIsContiguous()
-{
-    math::Matrix<ElementType, layout> M{
-        { 1, 0, 4, 0 },
-        { 0, 0, 0, 7 }
-    };
-
-    auto S = M.GetSubMatrix(1, 1, 1, 2);
-
-    testing::ProcessTest("Matrix::IsContiguous", M.IsContiguous() == true && S.IsContiguous() == false);
-}
-
-template <typename ElementType>
-void TestMatrixToArray()
-{
-    math::ColumnMatrix<ElementType> M{
-        { 1, 0, 4, 0 },
-        { 0, 0, 0, 7 }
-    };
-
-    math::RowMatrix<ElementType> N{
-        { 1, 0, 4, 0 },
-        { 0, 0, 0, 7 }
-    };
-
-    auto Sm = M.GetSubMatrix(0, 1, 2, 2);
-    auto Sn = N.GetSubMatrix(0, 1, 2, 2);
-
-    std::vector<ElementType> v{ 1, 0, 0, 0, 4, 0, 0, 7 };
-    std::vector<ElementType> u{ 0, 0, 4, 0 };
-    std::vector<ElementType> x{ 1, 0, 4, 0, 0, 0, 0, 7 };
-    std::vector<ElementType> y{ 0, 4, 0, 0 };
-
-    testing::ProcessTest("Matrix::ToArray", M.ToArray() == v && Sm.ToArray() == u && N.ToArray() == x && Sn.ToArray() == y);
-}
-
-template <typename ElementType, math::MatrixLayout layout>
-void TestMatrixSwap()
-{
-    math::Matrix<ElementType, layout> M{
-        { 1, 0, 4, 0 },
-        { 0, 0, 0, 7 }
-    };
-
-    math::Matrix<ElementType, layout> N{
-        { 1, 3 },
-        { 0, 3 }
-    };
-
-    math::Matrix<ElementType, layout> S{
-        { 1, 3 },
-        { 0, 3 }
-    };
-
-    M.Swap(N);
-
-    testing::ProcessTest("Matrix::Swap", M == S);
-}
-
-template <typename ElementType, math::MatrixLayout layout>
-void TestMatrixIsEqual()
-{
-    math::Matrix<ElementType, layout> M{
-        { 1, 0, 4, 0 },
-        { 0, 0, 0, 7 }
-    };
-
-    math::Matrix<ElementType, layout> N{
-        { 1, 0, 4, 0 },
-        { 0, 0, 0, 7 }
-    };
-
-    math::Matrix<ElementType, math::TransposeMatrixLayout<layout>::value> S{
-        { 1, 0, 4, 0 },
-        { 0, 0, 0, 7 }
-    };
-
-    auto A = M.GetSubMatrix(0, 1, 2, 2);
-
-    math::RowMatrix<ElementType> T{
-        { 0, 4 },
-        { 0, 0 }
-    };
-
-    testing::ProcessTest("Matrix::IsEqual", M.IsEqual(N) && M.IsEqual(S) && A.IsEqual(T) && T.IsEqual(A));
-}
-
-template <typename ElementType, math::MatrixLayout layout>
-void TestMatrixEqualityOperator()
-{
-    math::Matrix<ElementType, layout> M{
-        { 1, 0, 4, 0 },
-        { 0, 0, 0, 7 }
-    };
-
-    math::Matrix<ElementType, layout> N{
-        { 1, 0, 4, 0 },
-        { 0, 0, 0, 7 }
-    };
-
-    math::Matrix<ElementType, math::TransposeMatrixLayout<layout>::value> S{
-        { 1, 0, 4, 0 },
-        { 0, 0, 0, 7 }
-    };
-
-    auto A = M.GetSubMatrix(0, 1, 2, 2);
-
-    math::RowMatrix<ElementType> T{
-        { 0, 4 },
-        { 0, 0 }
-    };
-
-    testing::ProcessTest("Matrix::operator==", M == N && M == S && A == T && T == A);
-}
-
-template <typename ElementType, math::MatrixLayout layout>
-void TestMatrixInequalityOperator()
-{
-    math::Matrix<ElementType, layout> M{
-        { 1, 0, 4, 0 },
-        { 0, 0, 0, 7 }
-    };
-
-    math::Matrix<ElementType, layout> N{
-        { 1, 0, 4 },
-        { 0, 0, 0 }
-    };
-
-    math::Matrix<ElementType, layout> S{
-        { 1, 3, 4, 0 },
-        { 0, 0, 0, 7 }
-    };
-
-    auto A = M.GetSubMatrix(0, 1, 2, 2);
-    auto B = M.GetSubMatrix(0, 2, 2, 2);
-
-    testing::ProcessTest("Matrix::operator!=", M != N && M != S && A != B);
-}
-
-template <typename ElementType, math::MatrixLayout layout>
-void TestMatrixGetConstReference()
-{
-    math::Matrix<ElementType, layout> M{
-        { 1, 0, 4, 0 },
-        { 0, 0, 0, 7 }
-    };
-
-    auto N = M.GetConstReference();
-    auto A = M.GetSubMatrix(0, 1, 2, 2);
-    auto B = A.GetConstReference();
-
-    testing::ProcessTest("Matrix::GetConstReference", M == N && A == B);
-}
-
-template <typename ElementType, math::MatrixLayout layout>
-void TestMatrixGetSubMatrix()
-{
-    math::Matrix<ElementType, layout> M{
-        { 1, 0, 4, 0 },
-        { 0, 0, 0, 0 },
-        { 0, 0, 0, 7 }
-    };
-
-    auto N = M.GetSubMatrix(1, 1, 2, 3);
-    N.Fill(3);
-    N(0, 1) = 4;
-
-    auto S = N.GetSubMatrix(0, 1, 2, 2);
-
-    math::RowMatrix<ElementType> R{
-        { 1, 0, 4, 0 },
-        { 0, 3, 4, 3 },
-        { 0, 3, 3, 3 }
-    };
-
-    math::RowMatrix<ElementType> A{
-        { 4, 3 },
-        { 3, 3 }
-    };
-
-    testing::ProcessTest("Matrix::GetSubMatrix", M == R && S == A);
-}
-
-template <typename ElementType, math::MatrixLayout layout>
-void TestMatrixGetColumn()
-{
-    math::Matrix<ElementType, layout> M{
-        { 1, 0, 4, 0 },
-        { 0, 3, 4, 3 },
-        { 0, 3, 5, 6 }
-    };
-
-    auto u = M.GetColumn(1);
-    u[0] = 2;
-    u[1] = 2;
-    u[2] = 8;
-
-    auto N = M.GetSubMatrix(1, 1, 2, 3);
-    auto v = N.GetColumn(1);
-
-    math::RowMatrix<ElementType> R{
-        { 1, 2, 4, 0 },
-        { 0, 2, 4, 3 },
-        { 0, 8, 5, 6 }
-    };
-
-    math::ColumnVector<ElementType> w{ 4, 5 };
-
-    testing::ProcessTest("Matrix::GetColumn", M == R && v == w);
-}
-
-template <typename ElementType, math::MatrixLayout layout>
-void TestMatrixGetRow()
-{
-    math::Matrix<ElementType, layout> M{
-        { 1, 0, 4, 0 },
-        { 0, 3, 4, 3 },
-        { 0, 3, 5, 6 }
-    };
-
-    auto u = M.GetRow(1);
-    u[0] = 2;
-    u[1] = 2;
-    u[3] = 8;
-
-    auto N = M.GetSubMatrix(1, 1, 2, 3);
-    auto v = N.GetRow(1);
-
-    math::RowMatrix<ElementType> R{
-        { 1, 0, 4, 0 },
-        { 2, 2, 4, 8 },
-        { 0, 3, 5, 6 }
-    };
-
-    math::RowVector<ElementType> w{ 3, 5, 6 };
-
-    testing::ProcessTest("Matrix::GetRow", M == R && w == v);
-}
-
-template <typename ElementType, math::MatrixLayout layout>
-void TestMatrixGetDiagonal()
-{
-    math::Matrix<ElementType, layout> M{
-        { 1, 2, 4, 0 },
-        { 0, 2, 4, 3 },
-        { 0, 8, 5, 6 }
-    };
-
-    M.GetDiagonal().Fill(9);
-
-    auto N = M.GetSubMatrix(1, 1, 2, 3);
-    auto v = N.GetDiagonal();
-
-    math::RowMatrix<ElementType> R{
-        { 9, 2, 4, 0 },
-        { 0, 9, 4, 3 },
-        { 0, 8, 9, 6 }
-    };
-
-    math::ColumnVector<ElementType> u{ 9, 9 };
-
-    testing::ProcessTest("Matrix::GetDiagonal", M == R && u == v);
-}
-
-template <typename ElementType>
-void TestMatrixGetMajorVector()
-{
-    math::ColumnMatrix<ElementType> M{
-        { 1, 0, 4, 0 },
-        { 0, 0, 0, 7 }
-    };
-
-    math::RowMatrix<ElementType> N{
-        { 1, 0, 4, 0 },
-        { 0, 0, 0, 7 }
-    };
-
-    M.GetMajorVector(1).Fill(8);
-    N.GetMajorVector(1).Fill(8);
-
-    math::RowMatrix<ElementType> R1{
-        { 1, 8, 4, 0 },
-        { 0, 8, 0, 7 }
-    };
-
-    math::RowMatrix<ElementType> R2{
-        { 1, 0, 4, 0 },
-        { 8, 8, 8, 8 }
-    };
-
-    testing::ProcessTest("Matrix::GetMajorVector", M == R1 && N == R2);
-}
-
-template <typename ElementType, math::MatrixLayout layout>
-void TestMatrixTranspose()
-{
-    math::Matrix<ElementType, layout> M{
-        { 9, 2, 4, 0 },
-        { 0, 9, 4, 3 },
-        { 0, 8, 9, 6 }
-    };
-
-    auto T = M.Transpose();
-    auto N = M.GetSubMatrix(1, 1, 2, 2).Transpose();
-
-    math::RowMatrix<ElementType> R{
-        { 9, 0, 0 },
-        { 2, 9, 8 },
-        { 4, 4, 9 },
-        { 0, 3, 6 }
-    };
-
-    math::RowMatrix<ElementType> S{
-        { 9, 8 },
-        { 4, 9 }
-    };
-
-    testing::ProcessTest("Matrix::Transpose", T == R && N == S);
-}
-
-template <typename ElementType, math::MatrixLayout layout>
-void TestMatrixCopyFrom()
-{
-
-    math::Matrix<ElementType, layout> M(2, 4);
-
-    math::Matrix<ElementType, layout> N{
-        { 1, 0, 4, 0 },
-        { 0, 0, 0, 7 }
-    };
-
-    math::Matrix<ElementType, layout> S{
-        { 2, 6 },
-        { 3, 9 }
-    };
-
-    M.CopyFrom(N);
-    M.GetSubMatrix(0, 2, 2, 2).CopyFrom(S);
-
-    math::RowMatrix<ElementType> R{
-        { 1, 0, 2, 6 },
-        { 0, 0, 3, 9 }
-    };
-
-    testing::ProcessTest("Matrix::CopyFrom", M == R);
-}
-
-template <typename ElementType, math::MatrixLayout layout>
-void TestMatrixReset()
-{
-    math::Matrix<ElementType, layout> M{
-        { 1, 0, 4, 0 },
-        { 0, 0, 0, 7 }
-    };
-
-    math::Matrix<ElementType, layout> N{
-        { 1, 0, 4, 0 },
-        { 0, 0, 0, 7 }
-    };
-
-    M.Reset();
-    N.GetSubMatrix(0, 1, 2, 2).Reset();
-
-    math::RowMatrix<ElementType> R(2, 4);
-
-    math::RowMatrix<ElementType> T{
-        { 1, 0, 0, 0 },
-        { 0, 0, 0, 7 }
-    };
-
-    testing::ProcessTest("Matrix::Reset", M == R && N == T);
-}
-
-template <typename ElementType, math::MatrixLayout layout>
-void TestMatrixFill()
-{
-    math::Matrix<ElementType, layout> M{
-        { 1, 0, 4, 0 },
-        { 0, 0, 0, 7 }
-    };
-
-    math::Matrix<ElementType, layout> N{
-        { 1, 0, 4, 0 },
-        { 0, 0, 0, 7 }
-    };
-
-    M.Fill(-2);
-    N.GetSubMatrix(0, 1, 2, 2).Fill(-2);
-
-    math::RowMatrix<ElementType> R{
-        { -2, -2, -2, -2 },
-        { -2, -2, -2, -2 }
-    };
-
-    math::RowMatrix<ElementType> T{
-        { 1, -2, -2, 0 },
-        { 0, -2, -2, 7 }
-    };
-
-    testing::ProcessTest("Matrix::Fill", M == R && N == T);
-}
-
-template <typename ElementType, math::MatrixLayout layout>
-void TestMatrixGenerate()
-{
-    math::Matrix<ElementType, layout> M{
-        { 1, 0, 4, 0 },
-        { 0, 0, 0, 7 }
-    };
-
-    math::Matrix<ElementType, layout> N{
-        { 1, 0, 4, 0 },
-        { 0, 0, 0, 7 }
-    };
-
-    M.Generate([]() -> ElementType { return -2; });
-    N.GetSubMatrix(0, 1, 2, 2).Generate([]() -> ElementType { return -2; });
-
-    math::RowMatrix<ElementType> R{
-        { -2, -2, -2, -2 },
-        { -2, -2, -2, -2 }
-    };
-
-    math::RowMatrix<ElementType> T{
-        { 1, -2, -2, 0 },
-        { 0, -2, -2, 7 }
-    };
-
-    testing::ProcessTest("Matrix::Generate", M == R && N == T);
-}
-
-template <typename ElementType, math::MatrixLayout layout>
-void TestMatrixTransform()
-{
-    math::Matrix<ElementType, layout> M{
-        { 1, 0, 4, 0 },
-        { 0, 0, 0, 7 }
-    };
-
-    math::Matrix<ElementType, layout> N{
-        { 1, 0, 4, 0 },
-        { 0, 0, 0, 7 }
-    };
-
-    M.Transform([](ElementType x) { return 2 * x; });
-    N.GetSubMatrix(0, 1, 2, 2).Transform([](ElementType x) { return 2 * x; });
-
-    math::RowMatrix<ElementType> R{
-        { 2, 0, 8, 0 },
-        { 0, 0, 0, 14 }
-    };
-
-    math::RowMatrix<ElementType> T{
-        { 1, 0, 8, 0 },
-        { 0, 0, 0, 7 }
-    };
-
-    testing::ProcessTest("Matrix::Transform", M == R && N == T);
-}
-
-template <typename ElementType, math::MatrixLayout layout1, math::MatrixLayout layout2>
-void TestMatrixCopyCtor()
-{
-    math::Matrix<ElementType, layout1> M1{
-        { 1, 2, 3, 4, 5, 6, 7, 8, 9 },
-        { 1, 2, 3, 4, 5, 6, 7, 8, 9 },
-        { 1, 2, 3, 4, 5, 6, 7, 8, 9 },
-        { 1, 2, 3, 4, 5, 6, 7, 8, 9 }
-    };
-
-    math::Matrix<ElementType, layout2> M2(M1);
-
-    testing::ProcessTest("Matrix(Matrix)", M1 == M2);
-}
-
-template <typename ElementType, math::MatrixLayout layout>
-void TestMatrixPrint()
-{
-    math::Matrix<ElementType, layout> M{
-        { 1, 2, 0 },
-        { 0, 3, 7 }
-    };
-
-    std::stringstream stream1;
-    math::Print(M, stream1);
-    auto x = stream1.str();
-
-    std::stringstream stream2;
-    math::Print(M.GetSubMatrix(0, 1, 2, 2), stream2);
-    auto y = stream2.str();
-
-    testing::ProcessTest("Print(Matrix)", x == "{ { 1, 2, 0 },\n  { 0, 3, 7 } }\n" && y == "{ { 2, 0 },\n  { 3, 7 } }\n");
-}
-
-template <typename ElementType, math::MatrixLayout layout>
-void TestMatrixPlusEqualsOperatorScalar()
-{
-    math::Matrix<ElementType, layout> M{
-        { 1, 2, 0 },
-        { 0, 3, 7 }
-    };
-
-    M += -2;
-    M.GetSubMatrix(0, 1, 2, 2) += 1;
-
-    math::RowMatrix<ElementType> R{
-        { -1, 1, -1 },
-        { -2, 2, 6 }
-    };
-
-    testing::ProcessTest("Matrix::operator+=(scalar)", M == R);
-}
-
-template <typename ElementType, math::MatrixLayout layoutA, math::MatrixLayout layoutB, math::ImplementationType implementation>
-void TestMatrixPlusEqualsOperatorMatrix()
-{
-    math::Matrix<ElementType, layoutA> M{
-        { 1, 2, 0 },
-        { 0, 3, 7 }
-    };
-
-    math::Matrix<ElementType, layoutB> N{
-        { 4, 3, 2 },
-        { 1, 2, 1 }
-    };
-
-    M += N;
-    M.GetSubMatrix(0, 1, 2, 2) += N.GetSubMatrix(0, 0, 2, 2);
-
-    math::RowMatrix<ElementType> R{
-        { 5, 9, 5 },
-        { 1, 6, 10 }
-    };
-
-    testing::ProcessTest("Matrix::operator+=(Matrix)", M == R);
-}
-
-template <typename ElementType, math::MatrixLayout layout>
-void TestMatrixMinusEqualsOperatorScalar()
-{
-    math::Matrix<ElementType, layout> M{
-        { 1, 2, 0 },
-        { 0, 3, 7 }
-    };
-
-    M -= 2;
-    M.GetSubMatrix(0, 1, 2, 2) -= (-1);
-
-    math::RowMatrix<ElementType> R{
-        { -1, 1, -1 },
-        { -2, 2, 6 }
-    };
-
-    testing::ProcessTest("Matrix::operator-=(scalar)", M == R);
-}
-
-template <typename ElementType, math::MatrixLayout layoutA, math::MatrixLayout layoutB, math::ImplementationType implementation>
-void TestMatrixMinusEqualsOperatorMatrix()
-{
-    math::Matrix<ElementType, layoutA> M{
-        { 1, 2, 0 },
-        { 0, 3, 7 }
-    };
-
-    math::Matrix<ElementType, layoutB> N{
-        { -4, -3, -2 },
-        { -1, -2, -1 }
-    };
-
-    M -= N;
-    M.GetSubMatrix(0, 1, 2, 2) -= N.GetSubMatrix(0, 0, 2, 2);
-
-    math::RowMatrix<ElementType> R{
-        { 5, 9, 5 },
-        { 1, 6, 10 }
-    };
-    testing::ProcessTest("Matrix::operator-=(Matrix)", M == R);
-}
-
-template <typename ElementType, math::MatrixLayout layout>
-void TestMatrixTimesEqualsOperator()
-{
-    math::Matrix<ElementType, layout> M{
-        { 1, 2, 0 },
-        { 0, 3, 7 }
-    };
-
-    M *= -1;
-    M.GetSubMatrix(0, 1, 2, 2) *= 2;
-
-    math::RowMatrix<ElementType> R{
-        { -1, -4, 0 },
-        { 0, -6, -14 }
-    };
-
-    testing::ProcessTest("Matrix::operator*=", M == R);
-}
-
-template <typename ElementType, math::MatrixLayout layout>
-void TestMatrixDivideEqualsOperator()
-{
-    math::Matrix<ElementType, layout> M{
-        { 2, 4, 0 },
-        { 0, 6, -8 }
-    };
-
-    M /= -2;
-    M.GetSubMatrix(0, 1, 2, 2) /= 0.5;
-
-    math::RowMatrix<ElementType> R{
-        { -1, -4, 0 },
-        { 0, -6, 8 }
-    };
-
-    testing::ProcessTest("Matrix::operator/=", M == R);
-}
-
-template <typename ElementType, math::MatrixLayout layout, math::ImplementationType implementation>
-void TestMatrixAddUpdateScalar()
-{
-    auto implementationName = math::Internal::MatrixOperations<implementation>::GetImplementationName();
-
-    math::Matrix<ElementType, layout> M{
-        { 1, 2, 0 },
-        { 0, 3, 7 }
-    };
-
-    math::AddUpdate<implementation>(static_cast<ElementType>(-2), M);
-    math::AddUpdate<implementation>(static_cast<ElementType>(1), M.GetSubMatrix(0, 1, 2, 2));
-
-    math::RowMatrix<ElementType> R{
-        { -1, 1, -1 },
-        { -2, 2, 6 }
-    };
-
-    testing::ProcessTest(implementationName + "::AddUpdate(scalar, Matrix)", M == R);
-}
-
-template <typename ElementType, math::MatrixLayout layout, math::ImplementationType implementation>
-void TestMatrixAddUpdateZero()
-{
-    auto implementationName = math::Internal::MatrixOperations<implementation>::GetImplementationName();
-
-    math::Matrix<ElementType, layout> M{
-        { 1, 2, 0 },
-        { 0, 3, 7 }
-    };
-
-    math::AddUpdate<implementation>(static_cast<ElementType>(0), M);
-    math::AddUpdate<implementation>(static_cast<ElementType>(0), M.GetSubMatrix(0, 1, 2, 2));
-
-    math::RowMatrix<ElementType> R{
-        { 1, 2, 0 },
-        { 0, 3, 7 }
-    };
-
-    testing::ProcessTest(implementationName + "::AddUpdate(0, Matrix)", M == R);
-}
-
-template <typename ElementType, math::MatrixLayout layoutA, math::MatrixLayout layoutB, math::ImplementationType implementation>
-void TestMatrixAddUpdateMatrix()
-{
-    auto implementationName = math::Internal::MatrixOperations<implementation>::GetImplementationName();
-
-    math::Matrix<ElementType, layoutA> A{
-        { 1, 2, 0 },
-        { 0, 3, 7 }
-    };
-
-    math::Matrix<ElementType, layoutB> B{
-        { 4, 3, 2 },
-        { 1, 2, 1 }
-    };
-
-    math::AddUpdate<implementation>(A, B);
-    math::AddUpdate<implementation>(A.GetSubMatrix(0, 1, 2, 2), B.GetSubMatrix(0, 1, 2, 2));
-
-    math::RowMatrix<ElementType> R{
-        { 5, 7, 2 },
-        { 1, 8, 15 }
-    };
-
-    testing::ProcessTest(implementationName + "::AddUpdate(Matrix, Matrix)", B == R);
-}
-
-template <typename ElementType, math::MatrixLayout layoutA, math::MatrixLayout layoutB, math::ImplementationType implementation>
-void TestMatrixAddSetScalar()
-{
-    auto implementationName = math::Internal::MatrixOperations<implementation>::GetImplementationName();
-
-    math::Matrix<ElementType, layoutA> M{
-        { 1, 2, 0 },
-        { 0, 3, 7 }
-    };
-    math::Matrix<ElementType, layoutB> N(2, 3);
-
-    math::AddSet<implementation>(static_cast<ElementType>(-2), M, N);
-    math::AddSet<implementation>(static_cast<ElementType>(1), M.GetSubMatrix(0, 1, 2, 2), N.GetSubMatrix(0, 1, 2, 2));
-
-    math::RowMatrix<ElementType> R{
-        { -1, 3, 1 },
-        { -2, 4, 8 }
-    };
-
-    testing::ProcessTest(implementationName + "::AddSet(scalar, Matrix, Matrix)", N == R);
-}
-
-template <typename ElementType, math::MatrixLayout layoutA, math::MatrixLayout layoutB, math::ImplementationType implementation>
-void TestMatrixAddSetZero()
-{
-    auto implementationName = math::Internal::MatrixOperations<implementation>::GetImplementationName();
-
-    math::Matrix<ElementType, layoutA> M{
-        { 1, 2, 0 },
-        { 0, 3, 7 }
-    };
-    math::Matrix<ElementType, layoutB> N(2, 3);
-
-    math::AddSet<implementation>(static_cast<ElementType>(0), M, N);
-    math::AddSet<implementation>(static_cast<ElementType>(0), M.GetSubMatrix(0, 1, 2, 2), N.GetSubMatrix(0, 1, 2, 2));
-
-    testing::ProcessTest(implementationName + "::AddSet(0, Matrix, Matrix)", M == N);
-}
-
-template <typename ElementType, math::MatrixLayout layoutA, math::MatrixLayout layoutB, math::ImplementationType implementation>
-void TestMatrixAddSetMatrix()
-{
-    auto implementationName = math::Internal::MatrixOperations<implementation>::GetImplementationName();
-
-    math::Matrix<ElementType, layoutA> A{
-        { 1, 2, 0 },
-        { 0, 3, 7 }
-    };
-
-    math::Matrix<ElementType, layoutB> B{
-        { 1, 2, 1 },
-        { 0, 1, 6 }
-    };
-
-    math::Matrix<ElementType, layoutB> N(2, 3);
-
-    math::AddSet<implementation>(A, B, N);
-    math::AddSet<implementation>(A.GetSubMatrix(0, 1, 2, 2), B.GetSubMatrix(0, 1, 2, 2), N.GetSubMatrix(0, 1, 2, 2));
-
-    math::Matrix<ElementType, layoutB> R{
-        { 2, 4, 1 },
-        { 0, 4, 13 }
-    };
-
-    testing::ProcessTest(implementationName + "::AddSet(Matrix, Matrix, Matrix)", N == R);
-}
-
-template <typename ElementType, math::MatrixLayout layout, math::ImplementationType implementation>
-void TestMatrixScaleUpdate()
-{
-    auto implementationName = math::Internal::MatrixOperations<implementation>::GetImplementationName();
-
-    math::Matrix<ElementType, layout> M{
-        { 1, 2, 0 },
-        { 0, 3, 7 }
-    };
-
-    math::ScaleUpdate<implementation>(static_cast<ElementType>(-1), M);
-    math::ScaleUpdate<implementation>(static_cast<ElementType>(2), M.GetSubMatrix(0, 1, 2, 2));
-
-    math::RowMatrix<ElementType> R{
-        { -1, -4, 0 },
-        { 0, -6, -14 }
-    };
-
-    testing::ProcessTest(implementationName + "::ScaleUpdate(scalar, Matrix)", M == R);
-}
-
-template <typename ElementType, math::MatrixLayout layoutA, math::MatrixLayout layoutB, math::ImplementationType implementation>
-void TestMatrixScaleSet()
-{
-    auto implementationName = math::Internal::MatrixOperations<implementation>::GetImplementationName();
-
-    math::Matrix<ElementType, layoutA> M{
-        { 1, 2, 0 },
-        { 0, 3, 7 }
-    };
-
-    math::Matrix<ElementType, layoutB> N(2, 3);
-
-    math::ScaleSet<implementation>(static_cast<ElementType>(0), M, N);
-    math::ScaleSet<implementation>(static_cast<ElementType>(-1), M, N);
-    math::ScaleSet<implementation>(static_cast<ElementType>(2), M.GetSubMatrix(0, 1, 2, 2), N.GetSubMatrix(0, 1, 2, 2));
-
-    math::RowMatrix<ElementType> R{
-        { -1, 4, 0 },
-        { 0, 6, 14 }
-    };
-
-    testing::ProcessTest(implementationName + "::ScaleSet(scalar, Matrix, Matrix)", N == R);
-}
-
-template <typename ElementType, math::MatrixLayout layoutA, math::MatrixLayout layoutB, math::ImplementationType implementation>
-void TestMatrixScaleAddUpdateScalarMatrixOne()
-{
-    auto implementationName = math::Internal::MatrixOperations<implementation>::GetImplementationName();
-
-    math::Matrix<ElementType, layoutA> M{
-        { -1, 0, 0 },
-        { -1, 1, 3 }
-    };
-
-    math::Matrix<ElementType, layoutB> N{
-        { 1, 2, 1 },
-        { 0, -3, 4 }
-    };
-
-    math::ScaleAddUpdate<implementation>(static_cast<ElementType>(0), M, math::One(), N);
-    math::ScaleAddUpdate<implementation>(static_cast<ElementType>(1), M, math::One(), N);
-    math::ScaleAddUpdate<implementation>(static_cast<ElementType>(-2), M.GetSubMatrix(0, 1, 2, 2), math::One(), N.GetSubMatrix(0, 1, 2, 2));
-
-    math::RowMatrix<ElementType> R{
-        { 0, 2, 1 },
-        { -1, -4, 1 }
-    };
-
-    testing::ProcessTest(implementationName + "::ScaleAddUpdate(scalar, Matrix, one, Matrix)", N == R);
-}
-
-template <typename ElementType, math::MatrixLayout layout, math::ImplementationType implementation>
-void TestMatrixScaleAddUpdateScalarOnesMatrix()
-{
-    auto implementationName = math::Internal::MatrixOperations<implementation>::GetImplementationName();
-
-    math::Matrix<ElementType, layout> M{
-        { 1, 2, 0 },
-        { 0, 3, 7 }
-    };
-
-    math::ScaleAddUpdate<implementation>(static_cast<ElementType>(0), math::OnesMatrix(), static_cast<ElementType>(1), M);
-    math::ScaleAddUpdate<implementation>(static_cast<ElementType>(-1), math::OnesMatrix(), static_cast<ElementType>(2), M);
-    math::ScaleAddUpdate<implementation>(static_cast<ElementType>(1), math::OnesMatrix(), static_cast<ElementType>(-1), M.GetSubMatrix(0, 1, 2, 2));
-
-    math::RowMatrix<ElementType> R{
-        { 1, -2, 2 },
-        { -1, -4, -12 }
-    };
-
-    testing::ProcessTest(implementationName + "::ScaleAddUpdate(scalar, ones, scalar, Matrix)", M == R);
-}
-
-template <typename ElementType, math::MatrixLayout layoutA, math::MatrixLayout layoutB, math::ImplementationType implementation>
-void TestMatrixScaleAddUpdateOneMatrixScalar()
-{
-    auto implementationName = math::Internal::MatrixOperations<implementation>::GetImplementationName();
-
-    math::Matrix<ElementType, layoutA> M{
-        { 1, 2, -1 },
-        { -1, 3, 7 }
-    };
-
-    math::Matrix<ElementType, layoutB> N{
-        { 1, 0, 0 },
-        { 0, 3, 7 }
-    };
-
-    math::ScaleAddUpdate<implementation>(math::One(), M, static_cast<ElementType>(0), N);
-    math::ScaleAddUpdate<implementation>(math::One(), M, static_cast<ElementType>(-1), N);
-    math::ScaleAddUpdate<implementation>(math::One(), M.GetSubMatrix(0, 1, 2, 2), static_cast<ElementType>(-1), N.GetSubMatrix(0, 1, 2, 2));
-
-    math::RowMatrix<ElementType> R{
-        { 0, 2, -1 },
-        { 0, 3, 7 }
-    };
-
-    testing::ProcessTest(implementationName + "::ScaleAddUpdate(one, Matrix, scale, Matrix)", N == R);
-}
-
-template <typename ElementType, math::MatrixLayout layoutA, math::MatrixLayout layoutB, math::ImplementationType implementation>
-void TestMatrixScaleAddUpdateScalarMatrixScalar()
-{
-    auto implementationName = math::Internal::MatrixOperations<implementation>::GetImplementationName();
-
-    math::Matrix<ElementType, layoutA> M{
-        { 1, -2, 0 },
-        { 0, 3, 2 }
-    };
-
-    math::Matrix<ElementType, layoutB> N{
-        { -1, 2, 0 },
-        { 0, -3, 7 }
-    };
-
-    math::ScaleAddUpdate<implementation>(static_cast<ElementType>(0), M, static_cast<ElementType>(1), N);
-    math::ScaleAddUpdate<implementation>(static_cast<ElementType>(1), M, static_cast<ElementType>(-1), N);
-    math::ScaleAddUpdate<implementation>(static_cast<ElementType>(2), M.GetSubMatrix(0, 1, 2, 2), static_cast<ElementType>(2), N.GetSubMatrix(0, 1, 2, 2));
-
-    math::RowMatrix<ElementType> R{
-        { 2, -12, 0 },
-        { 0, 18, -6 }
-    };
-
-    testing::ProcessTest(implementationName + "::ScaleAddUpdate(scalar, Matrix, scalar, Matrix)", N == R);
-}
-
-template <typename ElementType, math::MatrixLayout layoutA, math::MatrixLayout layoutB, math::MatrixLayout outputLayout, math::ImplementationType implementation>
-void TestMatrixScaleAddSetScalarMatrixOne()
-{
-    auto implementationName = math::Internal::MatrixOperations<implementation>::GetImplementationName();
-
-    math::Matrix<ElementType, layoutA> M{
-        { 1, 2, 0 },
-        { 0, 3, 7 }
-    };
-
-    math::Matrix<ElementType, layoutB> N{
-        { -1, 1, 3 },
-        { 1, 1, 2 }
-    };
-
-    math::Matrix<ElementType, outputLayout> O(2, 3);
-
-    math::ScaleAddSet<implementation>(static_cast<ElementType>(-1), M, math::One(), N, O);
-
-    math::RowMatrix<ElementType> R{
-        { -2, -1, 3 },
-        { 1, -2, -5 }
-    };
-
-    testing::ProcessTest(implementationName + "::ScaleAddSet(scalar, Matrix, one, Matrix, Matrix, Matrix)", O == R);
-}
-
-template <typename ElementType, math::MatrixLayout layoutA, math::MatrixLayout layoutB, math::MatrixLayout outputLayout, math::ImplementationType implementation>
-void TestMatrixScaleAddSetOneMatrixScalar()
-{
-    auto implementationName = math::Internal::MatrixOperations<implementation>::GetImplementationName();
-
-    math::Matrix<ElementType, layoutA> M{
-        { 1, 2, 0 },
-        { 0, 3, 7 }
-    };
-
-    math::Matrix<ElementType, layoutB> N{
-        { -2, 0, 1 },
-        { 2, 1, 0 }
-    };
-
-    math::Matrix<ElementType, outputLayout> O(2, 3);
-
-    math::ScaleAddSet<implementation>(math::One(), M, static_cast<ElementType>(-1), N, O);
-
-    math::RowMatrix<ElementType> R{
-        { 3, 2, -1 },
-        { -2, 2, 7 }
-    };
-
-    testing::ProcessTest(implementationName + "::ScaleAddSet(one, Matrix, scalar, Matrix, Matrix, Matrix)", O == R);
-}
-
-template <typename ElementType, math::MatrixLayout layoutA, math::MatrixLayout layoutB, math::MatrixLayout outputLayout, math::ImplementationType implementation>
-void TestMatrixScaleAddSetScalarMatrixScalar()
-{
-    auto implementationName = math::Internal::MatrixOperations<implementation>::GetImplementationName();
-
-    math::Matrix<ElementType, layoutA> M{
-        { 1, 2, 0 },
-        { 0, 3, 7 }
-    };
-
-    math::Matrix<ElementType, layoutB> N{
-        { 1, -1, 2 },
-        { 2, -1, 0 }
-    };
-
-    math::Matrix<ElementType, outputLayout> O(2, 3);
-
-    math::ScaleAddSet<implementation>(static_cast<ElementType>(2), M, static_cast<ElementType>(-1), N, O);
-
-    math::RowMatrix<ElementType> R{
-        { 1, 5, -2 },
-        { -2, 7, 14 }
-    };
-
-    testing::ProcessTest(implementationName + "::ScaleAddSet(scalar, Matrix, scalar, Matrix, Matrix, Matrix)", O == R);
-}
-
-template <typename ElementType, math::MatrixLayout layout>
-void TestMatrixRowwiseSum()
-{
-    math::Matrix<ElementType, layout> M{
-        { 1, 2, 0 },
-        { 0, 3, 7 }
-    };
-
-    math::ColumnVector<ElementType> v(2);
-    math::RowwiseSum(M, v);
-
-    math::ColumnVector<ElementType> u{ 3, 10 };
-
-    testing::ProcessTest("ColumnwiseSum(Matrix, Vector)", v == u);
-}
-
-template <typename ElementType, math::MatrixLayout layout>
-void TestMatrixColumnwiseSum()
-{
-    math::Matrix<ElementType, layout> M{
-        { 1, 2, 0 },
-        { 0, 3, 7 }
-    };
-
-    math::RowVector<ElementType> v(3);
-    math::ColumnwiseSum(M, v);
-
-    math::RowVector<ElementType> u{ 1, 5, 7 };
-
-    testing::ProcessTest("ColumnwiseSum(Matrix, Vector)", v == u);
-}
-
-template <typename ElementType, math::MatrixLayout layout, math::ImplementationType implementation>
-void TestMatrixVectorMultiplyScaleAddUpdate()
-{
-    auto implementationName = math::Internal::MatrixOperations<implementation>::GetImplementationName();
-
-    math::Matrix<ElementType, layout> M{
-        { 1, 0 },
-        { 0, 1 },
-        { 2, 2 }
-    };
-
-    math::Matrix<ElementType, layout> N{
-        { 1, 0, 3, 1 },
-        { 0, 1, 0, -1 },
-        { 2, 0, 1, 3 },
-        { 2, 2, 2, 3 }
-    };
-
-    math::ColumnVector<ElementType> u{ 1, 1, 0 };
-    math::ColumnVector<ElementType> w{ 1, 1, 0 };
-    math::ColumnVector<ElementType> v{ 3, 4 };
-
-    ElementType s = 2;
-    ElementType t = 3;
-
-    math::MultiplyScaleAddUpdate<implementation>(s, M, v, t, u);
-    math::MultiplyScaleAddUpdate<implementation>(s, N.GetSubMatrix(1, 1, 3, 2), v, t, w);
-
-    math::ColumnVector<ElementType> r{ 9, 11, 28 };
-
-    testing::ProcessTest(implementationName + "::MultiplyScaleAddUpdate(scalar, Matrix, Vector, scalar, Vector)", u == r && w == r);
-}
-
-template <typename ElementType, math::MatrixLayout layout, math::ImplementationType implementation>
-void TestVectorMatrixMultiplyScaleAddUpdate()
-{
-    auto implementationName = math::Internal::MatrixOperations<implementation>::GetImplementationName();
-
-    math::Matrix<ElementType, layout> M{
-        { 1, 0 },
-        { 0, 1 },
-        { 2, 2 }
-    };
-
-    math::Matrix<ElementType, layout> N{
-        { 1, 0, 3, 1 },
-        { 0, 1, 0, -1 },
-        { 2, 0, 1, 3 },
-        { 2, 2, 2, 3 }
-    };
-
-    math::RowVector<ElementType> u{ 1, 1, 0 };
-    math::RowVector<ElementType> v{ 3, 4 };
-    math::RowVector<ElementType> w{ 3, 4 };
-
-    ElementType s = 2;
-    ElementType t = 3;
-
-    math::MultiplyScaleAddUpdate<implementation>(s, u, M, t, v);
-    math::MultiplyScaleAddUpdate<implementation>(s, u, N.GetSubMatrix(1, 1, 3, 2), t, w);
-
-    math::RowVector<ElementType> r{ 11, 14 };
-
-    testing::ProcessTest(implementationName + "::MultiplyScaleAddUpdate(scalar, Vector, Matrix, scalar, Vector)", v == r && w == r);
-}
-
-template <typename ElementType, math::MatrixLayout layout1, math::MatrixLayout layout2, math::MatrixLayout layout3, math::ImplementationType implementation>
-void TestMatrixMatrixMultiplyScaleAddUpdate()
-{
-    auto implementationName = math::Internal::MatrixOperations<implementation>::GetImplementationName();
-
-    math::Matrix<ElementType, layout1> A{
-        { 1, 2 },
-        { 3, 1 },
-        { 2, 0 }
-    };
-
-    // a padded version of A
-    math::Matrix<ElementType, layout1> AA{
-        { 1, 1, 1, 1 },
-        { 1, 1, 2, 1 },
-        { 1, 3, 1, 1 },
-        { 1, 2, 0, 1 },
-        { 1, 1, 1, 1 }
-    };
-
-    math::Matrix<ElementType, layout2> B{
-        { 3, 4, 5, 6 },
-        { 8, 9, 10, 11 }
-    };
-
-    // A padded version of B
-    math::Matrix<ElementType, layout2> BB{
-        { 1, 1, 1, 1, 1, 1 },
-        { 1, 3, 4, 5, 6, 1 },
-        { 1, 8, 9, 10, 11, 1 },
-        { 1, 1, 1, 1, 1, 1 }
-    };
-
-    math::Matrix<ElementType, layout3> C(A.NumRows(), B.NumColumns());
-    C.Fill(1);
-    math::MultiplyScaleAddUpdate<implementation>(static_cast<ElementType>(1), A, B, static_cast<ElementType>(-1), C);
-
-    math::Matrix<ElementType, layout3> CC(A.NumRows() + 2, B.NumColumns() + 2);
-    CC.Fill(1);
-    auto CCC = CC.GetSubMatrix(1, 1, 3, 4);
-    math::MultiplyScaleAddUpdate<implementation>(static_cast<ElementType>(1), AA.GetSubMatrix(1, 1, 3, 2), BB.GetSubMatrix(1, 1, 2, 4), static_cast<ElementType>(-1), CCC);
-
-    math::Matrix<ElementType, layout3> R{
-        { 18, 21, 24, 27 },
-        { 16, 20, 24, 28 },
-        { 5, 7, 9, 11 }
-    };
-
-    testing::ProcessTest(implementationName + "::MultiplyScaleAddUpdate(scalar, Matrix, Matrix, scalar, Matrix)", C == R && CCC == R);
-}
-
-template <typename ElementType, math::MatrixLayout layout>
-void TestMatrixElementwiseMultiplySet()
-{
-    math::Matrix<ElementType, layout> M{
-        { 1, 2, 0 },
-        { 0, 3, 7 }
-    };
-
-    math::Matrix<ElementType, layout> N{
-        { -1, 1, -1 },
-        { 1, 1, 2 }
-    };
-
-    math::Matrix<ElementType, layout> C(2, 3);
-
-    math::ElementwiseMultiplySet(M, N, C);
-
-    math::RowMatrix<ElementType> R{
-        { -1, 2, 0 },
-        { 0, 3, 14 }
-    };
-
-    testing::ProcessTest("ElementwiseMultiplySet(Matrix, Matrix, Matrix)", C == R);
-}
-
-template <typename ElementType, math::MatrixLayout layout>
-void TestMatrixRowwiseCumulativeSumUpdate()
-{
-    math::Matrix<ElementType, layout> M{
-        { 1, 2, 0 },
-        { 0, 3, 7 }
-    };
-
-    math::RowwiseCumulativeSumUpdate(M);
-
-    math::Matrix<ElementType, layout> R{
-        { 1, 3, 3 },
-        { 0, 3, 10 }
-    };
-
-    testing::ProcessTest("RowwiseCumulativeSumUpdate(Matrix)", M == R);
-}
-
-template <typename ElementType, math::MatrixLayout layout>
-void TestMatrixColumnwiseCumulativeSumUpdate()
-{
-    math::Matrix<ElementType, layout> M{
-        { 1, 2, 0 },
-        { 0, 3, 7 }
-    };
-
-    math::ColumnwiseCumulativeSumUpdate(M);
-
-    math::Matrix<ElementType, layout> R{
-        { 1, 2, 0 },
-        { 1, 5, 7 }
-    };
-
-    testing::ProcessTest("ColumnwiseCumulativeSumUpdate(Matrix)", M == R);
-}
-
-template <typename ElementType, math::MatrixLayout layout>
-void TestMatrixRowwiseConsecutiveDifferenceUpdate()
-{
-    math::Matrix<ElementType, layout> M{
-        { 1, 2, 0 },
-        { 0, 3, 7 }
-    };
-
-    math::RowwiseConsecutiveDifferenceUpdate(M);
-
-    math::Matrix<ElementType, layout> R{
-        { 1, 1, -2 },
-        { 0, 3, 4 }
-    };
-
-    testing::ProcessTest("RowwiseConsecutiveDifferenceUpdate(Matrix)", M == R);
-}
-
-template <typename ElementType, math::MatrixLayout layout>
-void TestMatrixColumnwiseConsecutiveDifferenceUpdate()
-{
-    math::Matrix<ElementType, layout> M{
-        { 1, 2, 0 },
-        { 0, 3, 7 }
-    };
-
-    math::ColumnwiseConsecutiveDifferenceUpdate(M);
-
-    math::Matrix<ElementType, layout> R{
-        { 1, 2, 0 },
-        { -1, 1, 7 }
-    };
-
-    testing::ProcessTest("ColumnwiseConsecutiveDifferenceUpdate(Matrix)", M == R);
-}
-
-template <typename ElementType, math::MatrixLayout layout>
-void TestMatrixArchiver()
-{
-    math::Matrix<ElementType, layout> M(3, 4);
-    M(0, 0) = 1;
-    M(0, 2) = 4;
-    M(2, 3) = 7;
-
-    utilities::SerializationContext context;
-    std::stringstream strstream;
-    utilities::JsonArchiver archiver(strstream);
-
-    math::MatrixArchiver::Write(M, "test", archiver);
-    utilities::JsonUnarchiver unarchiver(strstream, context);
-
-    math::Matrix<ElementType, layout> Ma(0, 0);
-    math::MatrixArchiver::Read(Ma, "test", unarchiver);
-
-    testing::ProcessTest("MatrixArchiver", Ma == M);
-}
diff --git a/libraries/math/test/tcc/Tensor_test.tcc b/libraries/math/test/tcc/Tensor_test.tcc
deleted file mode 100644
index 6702ab361..000000000
--- a/libraries/math/test/tcc/Tensor_test.tcc
+++ /dev/null
@@ -1,807 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     Tensor_test.tcc (math_test)
-//  Authors:  Ofer Dekel, Kern Handa
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#include <math/include/TensorOperations.h>
-
-#include <testing/include/testing.h>
-
-// stl
-#include <cstdlib> // rand
-
-template <typename ElementType, math::Dimension dimension0, math::Dimension dimension1, math::Dimension dimension2>
-void TestTensorIndexer()
-{
-    auto T = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
-        { { 1, 2, 3, 4 }, { 1, 2, 3, 4 }, { 1, 2, 3, 4 } },
-        { { 1, 2, 3, 4 }, { 1, 2, 3, 4 }, { 1, 2, 3, 4 } }
-    };
-
-    auto S = T.GetSubTensor({ 0, 1, 2 }, { 2, 2, 2 });
-
-    T(1, 2, 3) = 7;
-    T(0, 1, 2) = 8;
-
-    auto R1 = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
-        { { 1, 2, 3, 4 }, { 1, 2, 8, 4 }, { 1, 2, 3, 4 } },
-        { { 1, 2, 3, 4 }, { 1, 2, 3, 4 }, { 1, 2, 3, 7 } }
-    };
-
-    auto R2 = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
-        { { 8, 4 }, { 3, 4 } },
-        { { 3, 4 }, { 3, 7 } }
-    };
-
-    testing::ProcessTest("Tensor::operator()", T == R1 && S == R2);
-}
-
-template <typename ElementType, math::Dimension dimension0, math::Dimension dimension1, math::Dimension dimension2>
-void TestTensorSize()
-{
-    math::Tensor<ElementType, dimension0, dimension1, dimension2> T(10, 20, 30);
-    auto S = T.GetSubTensor({ 0, 1, 2 }, { 2, 2, 2 });
-
-    testing::ProcessTest("Tensor::Size", T.Size() == 10 * 20 * 30 && S.Size() == 2 * 2 * 2);
-}
-
-template <typename ElementType, math::Dimension dimension0, math::Dimension dimension1, math::Dimension dimension2>
-void TestTensorNumRows()
-{
-    math::Tensor<ElementType, dimension0, dimension1, dimension2> T(10, 20, 30);
-
-    testing::ProcessTest("Tensor::NumRows", T.NumRows() == 10);
-}
-
-template <typename ElementType, math::Dimension dimension0, math::Dimension dimension1, math::Dimension dimension2>
-void TestTensorNumColumns()
-{
-    math::Tensor<ElementType, dimension0, dimension1, dimension2> T(10, 20, 30);
-
-    testing::ProcessTest("Tensor::NumColumns", T.NumColumns() == 20);
-}
-
-template <typename ElementType, math::Dimension dimension0, math::Dimension dimension1, math::Dimension dimension2>
-void TestTensorNumChannels()
-{
-    math::Tensor<ElementType, dimension0, dimension1, dimension2> T(10, 20, 30);
-
-    testing::ProcessTest("Tensor::NumChannels", T.NumChannels() == 30);
-}
-
-template <typename ElementType, math::Dimension dimension0, math::Dimension dimension1, math::Dimension dimension2>
-void TestTensorGetShape()
-{
-    math::Tensor<ElementType, dimension0, dimension1, dimension2> T(10, 20, 30);
-    auto shape = T.GetShape();
-
-    testing::ProcessTest("Tensor::GetShape", shape == math::TensorShape{ 10, 20, 30 });
-}
-
-template <typename ElementType>
-void TestTensorNumSlices()
-{
-    math::ColumnRowChannelTensor<ElementType> T(10, 20, 30);
-    math::ChannelColumnRowTensor<ElementType> S(10, 20, 30);
-
-    testing::ProcessTest("Tensor::NumSlices",
-                         math::NumSlices<math::Dimension::column, math::Dimension::row>(T) == 30 && math::NumSlices<math::Dimension::row, math::Dimension::column>(T) == 30 && math::NumSlices<math::Dimension::column, math::Dimension::channel>(T) == 10 && math::NumSlices<math::Dimension::channel, math::Dimension::column>(T) == 10 && math::NumSlices<math::Dimension::channel, math::Dimension::row>(S) == 20 && math::NumSlices<math::Dimension::row, math::Dimension::channel>(S) == 20 && math::NumSlices<math::Dimension::column, math::Dimension::channel>(S) == 10 && math::NumSlices<math::Dimension::channel, math::Dimension::column>(S) == 10);
-
-    auto test1DNumSlices = [](auto T) {
-        testing::ProcessTest("Tensor::NumSlices",
-                             math::NumSlices<math::Dimension::channel>(T) == (10 * 20) && math::NumSlices<math::Dimension::column>(T) == (10 * 30) && math::NumSlices<math::Dimension::row>(T) == (20 * 30));
-    };
-    test1DNumSlices(T);
-    test1DNumSlices(S);
-}
-
-template <typename ElementType>
-void TestTensorNumPrimarySlices()
-{
-    math::ColumnRowChannelTensor<ElementType> T(10, 20, 30);
-    math::ChannelColumnRowTensor<ElementType> S(10, 20, 30);
-
-    testing::ProcessTest("Tensor::NumPrimarySlices", T.NumPrimarySlices() == 30 && S.NumPrimarySlices() == 10);
-}
-
-template <typename ElementType, math::Dimension dimension0, math::Dimension dimension1, math::Dimension dimension2>
-void TestTensorIsEqual()
-{
-    auto S = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
-        { { 1, 2, 3, 4 }, { 1, 2, 3, 4 }, { 1, 2, 3, 4 } },
-        { { 1, 2, 3, 4 }, { 1, 2, 3, 4 }, { 1, 2, 3, 4 } }
-    };
-
-    auto T = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
-        { { 1, 2, 3, 4 }, { 1, 2, 3, 4 }, { 1, 2, 3, 4 } },
-        { { 1, 2, 3, 4 }, { 1, 2, 3, 4 }, { 1, 2, 3, 4 } }
-    };
-
-    testing::ProcessTest("Tensor::IsEqual", S.IsEqual(T) && T.GetSubTensor({ 0, 1, 2 }, { 2, 2, 2 }).IsEqual(S.GetSubTensor({ 0, 1, 2 }, { 2, 2, 2 })));
-}
-
-template <typename ElementType, math::Dimension dimension0, math::Dimension dimension1, math::Dimension dimension2>
-void TestTensorEqualityOperator()
-{
-    auto S = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
-        { { 1, 2, 3, 4 }, { 1, 2, 3, 4 }, { 1, 2, 3, 4 } },
-        { { 1, 2, 3, 4 }, { 1, 2, 3, 4 }, { 1, 2, 3, 4 } }
-    };
-
-    auto T = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
-        { { 1, 2, 3, 4 }, { 1, 2, 3, 4 }, { 1, 2, 3, 4 } },
-        { { 1, 2, 3, 4 }, { 1, 2, 3, 4 }, { 1, 2, 3, 4 } }
-    };
-
-    testing::ProcessTest("Tensor::operator==", T == S && T.GetSubTensor({ 0, 1, 2 }, { 2, 2, 2 }) == S.GetSubTensor({ 0, 1, 2 }, { 2, 2, 2 }));
-}
-
-template <typename ElementType, math::Dimension dimension0, math::Dimension dimension1, math::Dimension dimension2>
-void TestTensorInequalityOoperator()
-{
-    auto S = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
-        { { 1, 2, 3, 4 }, { 1, 2, 3, 4 }, { 1, 2, 3, 4 } },
-        { { 1, 2, 3, 4 }, { 1, 2, 3, 4 }, { 1, 2, 3, 4 } }
-    };
-
-    auto T = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
-        { { 1, 2, 3, 4 }, { 1, 2, 8, 4 }, { 1, 2, 3, 4 } },
-        { { 1, 2, 3, 4 }, { 1, 2, 3, 4 }, { 1, 2, 3, 4 } }
-    };
-
-    auto U = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
-        { { 1, 2, 3, 4 }, { 1, 2, 3, 4 } },
-        { { 1, 2, 3, 4 }, { 1, 2, 3, 4 } }
-    };
-
-    testing::ProcessTest("Tensor::operator!=", T != S && T.GetSubTensor({ 0, 1, 2 }, { 2, 2, 2 }) != S.GetSubTensor({ 0, 1, 2 }, { 2, 2, 2 }) && T != U);
-}
-
-template <typename ElementType, math::Dimension dimension0, math::Dimension dimension1, math::Dimension dimension2>
-void TestTensorGetConstReference()
-{
-    auto T = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
-        { { 1, 2, 3, 4 }, { 1, 2, 3, 4 }, { 1, 2, 3, 4 } },
-        { { 1, 2, 3, 4 }, { 1, 2, 3, 4 }, { 1, 2, 3, 4 } }
-    };
-
-    auto S = T.GetSubTensor({ 0, 1, 2 }, { 2, 2, 2 });
-
-    testing::ProcessTest("Tensor::operator==", T == T.GetConstReference() && S == S.GetConstReference());
-}
-
-template <typename ElementType, math::Dimension dimension0, math::Dimension dimension1, math::Dimension dimension2>
-void TestTensorGetSubTensor()
-{
-    auto T = math::Tensor<ElementType, dimension0, dimension1, dimension2>(4, 6, 8);
-    auto subT = T.GetSubTensor({ 1, 2, 3 }, { 2, 3, 4 });
-    subT.Fill(1);
-
-    auto S = math::Tensor<ElementType, dimension0, dimension1, dimension2>(4, 6, 8);
-    for (size_t i = 1; i < 3; ++i)
-    {
-        for (size_t j = 2; j < 5; ++j)
-        {
-            for (size_t k = 3; k < 7; ++k)
-            {
-                S(i, j, k) = 1;
-            }
-        }
-    }
-
-    testing::ProcessTest("TestGetSubTensor()", T == S);
-}
-
-template <typename ElementType>
-void TestTensorGetSlice()
-{
-    math::ColumnRowChannelTensor<ElementType> T1(3, 4, 5);
-    T1(0, 0, 0) = 1;
-    T1(1, 2, 3) = 2;
-    T1(0, 3, 3) = 3;
-    T1(2, 2, 4) = 3;
-
-    auto T1Test2DSlice = [](auto T) {
-        auto M1 = math::GetSlice<math::Dimension::column, math::Dimension::row>(T, 3);
-        testing::ProcessTest("TensorReference::GetSlice()", M1(2, 1) == 2 && M1(3, 0) == 3);
-
-        auto M2 = math::GetSlice<math::Dimension::row, math::Dimension::column>(T, 3);
-        testing::ProcessTest("TensorReference::GetSlice()", M2(1, 2) == 2 && M2(0, 3) == 3);
-
-        auto M3 = math::GetSlice<math::Dimension::column, math::Dimension::channel>(T, 0);
-        testing::ProcessTest("TensorReference::GetSlice()", M3(0, 0) == 1 && M3(3, 3) == 3);
-
-        auto M4 = math::GetSlice<math::Dimension::channel, math::Dimension::column>(T, 0);
-        testing::ProcessTest("TensorReference::GetSlice()", M4(0, 0) == 1 && M4(3, 3) == 3);
-    };
-
-    T1Test2DSlice(T1);
-    T1Test2DSlice(T1.GetConstReference());
-
-    math::ChannelColumnRowTensor<ElementType> T2(3, 4, 5);
-    T2(0, 0, 0) = 1;
-    T2(1, 2, 3) = 2;
-    T2(0, 3, 3) = 3;
-    T2(2, 2, 4) = 4;
-
-    auto T2Test2DSlice = [](auto T) {
-        auto M1 = math::GetSlice<math::Dimension::column, math::Dimension::channel>(T, 0);
-        testing::ProcessTest("TensorReference::GetSlice()", M1(0, 0) == 1 && M1(3, 3) == 3);
-
-        auto M2 = math::GetSlice<math::Dimension::channel, math::Dimension::column>(T, 0);
-        testing::ProcessTest("TensorReference::GetSlice()", M2(0, 0) == 1 && M2(3, 3) == 3);
-
-        auto M3 = math::GetSlice<math::Dimension::row, math::Dimension::channel>(T, 2);
-        testing::ProcessTest("TensorReference::GetSlice()", M3(1, 3) == 2 && M3(2, 4) == 4);
-
-        auto M4 = math::GetSlice<math::Dimension::channel, math::Dimension::row>(T, 2);
-        testing::ProcessTest("TensorReference::GetSlice()", M4(3, 1) == 2 && M4(4, 2) == 4);
-    };
-
-    T2Test2DSlice(T2);
-    T2Test2DSlice(T2.GetConstReference());
-
-    auto vectorSliceTest = [](auto _) {
-        using TensorType = decltype(_);
-
-        // T = numpy.arange(5 * 7 * 11).reshape(5, 7, 11)
-        TensorType T(5, 7, 11);
-        for (unsigned i = 0; i < 5; ++i)
-        {
-            for (unsigned j = 0; j < 7; ++j)
-            {
-                for (unsigned k = 0; k < 11; ++k)
-                {
-                    T(i, j, k) = static_cast<typename TensorType::TensorElementType>(k + j * 11 + i * 77);
-                }
-            }
-        }
-
-        auto test1DGetSlice = [](auto T) {
-            // equivalent of NumPy's T[4, 6, ...]
-            auto V1 = math::GetSlice<math::Dimension::channel>(T, 4, 6);
-            testing::ProcessTest("TensorReference::GetSlice()", V1 == math::ColumnVector<ElementType>({ 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384 }));
-
-            // equivalent of NumPy's T[4, ..., 8]
-            auto V2 = math::GetSlice<math::Dimension::column>(T, 4, 8);
-            testing::ProcessTest("TensorReference::GetSlice()", V2 == math::ColumnVector<ElementType>({ 316, 327, 338, 349, 360, 371, 382 }));
-
-            // equivalent of NumPy's T[..., 6, 8]
-            auto V3 = math::GetSlice<math::Dimension::row>(T, 6, 8);
-            testing::ProcessTest("TensorReference::GetSlice()", V3 == math::ColumnVector<ElementType>({ 74, 151, 228, 305, 382 }));
-        };
-
-        test1DGetSlice(T);
-        test1DGetSlice(T.GetConstReference());
-
-        typename TensorType::TensorElementType originalElementVal = 0;
-
-        // T[..., 6, 8][0] = 0
-        auto V1 = math::GetSlice<math::Dimension::channel>(T, 4, 6);
-        std::swap(originalElementVal, V1[0]);
-        testing::ProcessTest("TensorReference::GetSlice() after modification", V1 == math::ColumnVector<ElementType>({ 0, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384 }));
-        testing::ProcessTest("T(4, 6, 0) == 0", T(4, 6, 0) == 0);
-        std::swap(originalElementVal, V1[0]);
-
-        // T[4..., 8][0] = 0
-        auto V2 = math::GetSlice<math::Dimension::column>(T, 4, 8);
-        std::swap(originalElementVal, V2[0]);
-        testing::ProcessTest("TensorReference::GetSlice() after modification", V2 == math::ColumnVector<ElementType>({ 0, 327, 338, 349, 360, 371, 382 }));
-        testing::ProcessTest("T(4, 0, 8) == 0", T(4, 0, 8) == 0);
-        std::swap(originalElementVal, V2[0]);
-
-        // T[4, 6, ...][0] = 0
-        auto V3 = math::GetSlice<math::Dimension::row>(T, 6, 8);
-        std::swap(originalElementVal, V3[0]);
-        testing::ProcessTest("TensorReference::GetSlice() after modification", V3 == math::ColumnVector<ElementType>({ 0, 151, 228, 305, 382 }));
-        testing::ProcessTest("T(0, 6, 8) == 0", T(0, 6, 8) == 0);
-        std::swap(originalElementVal, V3[0]);
-    };
-
-    vectorSliceTest(math::ChannelColumnRowTensor<ElementType>{});
-    vectorSliceTest(math::ColumnRowChannelTensor<ElementType>{});
-}
-
-template <typename ElementType, math::Dimension dimension0, math::Dimension dimension1, math::Dimension dimension2>
-void TestTensorGetPrimarySlice()
-{}
-
-template <typename ElementType, math::Dimension dimension0, math::Dimension dimension1, math::Dimension dimension2>
-void TestTensorReferenceAsVector()
-{
-    math::ChannelColumnRowTensor<ElementType> T(3, 4, 2);
-    T(0, 0, 0) = 1;
-    T(0, 0, 1) = 2;
-    T(0, 1, 0) = 3;
-    T(0, 1, 1) = 4;
-    math::ColumnRowChannelTensor<ElementType> S(T);
-
-    auto u = T.ReferenceAsVector();
-    auto v = S.ReferenceAsVector();
-
-    math::RowVector<ElementType> r1{ 1, 2, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
-    math::RowVector<ElementType> r2{ 1, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
-
-    testing::ProcessTest("TensorReference::ReferenceAsVector()", u == r1 && v == r2);
-}
-
-template <typename ElementType>
-void TestTensorReferenceAsMatrix()
-{
-    math::ChannelColumnRowTensor<ElementType> T(3, 4, 2);
-    T(0, 0, 0) = 1;
-    T(0, 0, 1) = 2;
-    T(0, 1, 0) = 3;
-    T(0, 1, 1) = 4;
-    math::ColumnRowChannelTensor<ElementType> S(T);
-
-    auto M = T.ReferenceAsMatrix();
-    auto N = S.ReferenceAsMatrix();
-
-    math::RowMatrix<ElementType> R1{
-        { 1, 2, 3, 4, 0, 0, 0, 0 },
-        { 0, 0, 0, 0, 0, 0, 0, 0 },
-        { 0, 0, 0, 0, 0, 0, 0, 0 }
-    };
-
-    math::RowMatrix<ElementType> R2{
-        { 1, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
-        { 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
-    };
-
-    testing::ProcessTest("TensorReference::ReferenceAsMatrix", M == R1 && N == R2);
-}
-
-template <typename ElementType>
-void TestTensorReferenceAsMatrixCopy()
-{
-    math::ChannelColumnRowTensor<ElementType> T(2, 4, 1);
-    float x = 1;
-    for (size_t i = 0; i < 2; i++)
-    {
-        for (size_t j = 0; j < 4; j++)
-        {
-            T(i, j, 0) = x++;
-        }
-    }
-
-    math::RowMatrix<ElementType> E{
-        { 1, 5 },
-        { 2, 6 },
-        { 3, 7 },
-        { 4, 8 }
-    };
-
-    auto r = T.GetConstReference();
-
-    auto result = math::RowMatrix<ElementType>(r.ReferenceAsMatrix().Transpose());
-
-    testing::ProcessTest("TensorReference::ReferenceAsMatrix.Transpose and copy", result.IsEqual(E));
-}
-
-template <typename ElementType, math::Dimension dimension0, math::Dimension dimension1, math::Dimension dimension2>
-void TestTensorCopyFrom()
-{
-    auto T = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
-        { { 1, 2, 3, 4 }, { 5, 6, 7, 8 }, { 9, 0, 1, 2 } },
-        { { 3, 4, 5, 6 }, { 7, 8, 9, 0 }, { 1, 2, 3, 4 } }
-    };
-
-    math::Tensor<ElementType, dimension0, dimension1, dimension2> S(2, 3, 4);
-    S.CopyFrom(T);
-
-    math::Tensor<ElementType, math::Dimension::column, math::Dimension::row, math::Dimension::channel> S2(2, 3, 4);
-    S2.CopyFrom(T);
-
-    auto M = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
-        { { 1, 2, 3, 4 }, { 5, 6, 7, 8 }, { 9, 0, 1, 2 } },
-        { { 3, 4, 5, 6 }, { 7, 8, 9, 0 }, { 1, 2, 3, 4 } }
-    };
-
-    auto N = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
-        { { 5, 6 }, { 9, 0 } },
-        { { 4, 5 }, { 7, 8 } }
-    };
-
-    M.GetSubTensor({ 0, 1, 2 }, { 2, 2, 2 }).CopyFrom(N);
-
-    auto R = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
-        { { 1, 2, 3, 4 }, { 5, 6, 5, 6 }, { 9, 0, 9, 0 } },
-        { { 3, 4, 5, 6 }, { 7, 8, 4, 5 }, { 1, 2, 7, 8 } }
-    };
-
-    testing::ProcessTest("TensorReference::CopyFrom", S == T && S2 == T && M == R);
-}
-
-template <typename ElementType, math::Dimension dimension0, math::Dimension dimension1, math::Dimension dimension2>
-void TestTensorReset()
-{
-    auto T = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
-        { { 1, 2, 3, 4 }, { 5, 6, 7, 8 }, { 9, 0, 1, 2 } },
-        { { 3, 4, 5, 6 }, { 7, 8, 9, 0 }, { 1, 2, 3, 4 } }
-    };
-
-    T.Reset();
-
-    math::Tensor<ElementType, dimension0, dimension1, dimension2> S(2, 3, 4);
-
-    auto M = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
-        { { 1, 2, 3, 4 }, { 5, 6, 7, 8 }, { 9, 0, 1, 2 } },
-        { { 3, 4, 5, 6 }, { 7, 8, 9, 0 }, { 1, 2, 3, 4 } }
-    };
-
-    M.GetSubTensor({ 0, 1, 2 }, { 2, 2, 2 }).Reset();
-
-    auto R = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
-        { { 1, 2, 3, 4 }, { 5, 6, 0, 0 }, { 9, 0, 0, 0 } },
-        { { 3, 4, 5, 6 }, { 7, 8, 0, 0 }, { 1, 2, 0, 0 } }
-    };
-
-    testing::ProcessTest("TensorReference::Reset", S == T && M == R);
-}
-
-template <typename ElementType, math::Dimension dimension0, math::Dimension dimension1, math::Dimension dimension2>
-void TestTensorFill()
-{
-    auto T = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
-        { { 1, 2, 3, 4 }, { 5, 6, 7, 8 }, { 9, 0, 1, 2 } },
-        { { 3, 4, 5, 6 }, { 7, 8, 9, 0 }, { 1, 2, 3, 4 } }
-    };
-
-    T.Fill(3);
-
-    auto S = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
-        { { 3, 3, 3, 3 }, { 3, 3, 3, 3 }, { 3, 3, 3, 3 } },
-        { { 3, 3, 3, 3 }, { 3, 3, 3, 3 }, { 3, 3, 3, 3 } }
-    };
-
-    auto M = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
-        { { 1, 2, 3, 4 }, { 5, 6, 7, 8 }, { 9, 0, 1, 2 } },
-        { { 3, 4, 5, 6 }, { 7, 8, 9, 0 }, { 1, 2, 3, 4 } }
-    };
-
-    M.GetSubTensor({ 0, 1, 2 }, { 2, 2, 2 }).Fill(3);
-
-    auto R = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
-        { { 1, 2, 3, 4 }, { 5, 6, 3, 3 }, { 9, 0, 3, 3 } },
-        { { 3, 4, 5, 6 }, { 7, 8, 3, 3 }, { 1, 2, 3, 3 } }
-    };
-
-    testing::ProcessTest("TensorReference::Fill", S == T && M == R);
-}
-
-template <typename ElementType, math::Dimension dimension0, math::Dimension dimension1, math::Dimension dimension2>
-void TestTensorGenerate()
-{
-    auto T = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
-        { { 1, 2, 3, 4 }, { 5, 6, 7, 8 }, { 9, 0, 1, 2 } },
-        { { 3, 4, 5, 6 }, { 7, 8, 9, 0 }, { 1, 2, 3, 4 } }
-    };
-
-    T.Generate([]() -> ElementType { return 3; });
-
-    auto S = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
-        { { 3, 3, 3, 3 }, { 3, 3, 3, 3 }, { 3, 3, 3, 3 } },
-        { { 3, 3, 3, 3 }, { 3, 3, 3, 3 }, { 3, 3, 3, 3 } }
-    };
-
-    auto M = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
-        { { 1, 2, 3, 4 }, { 5, 6, 7, 8 }, { 9, 0, 1, 2 } },
-        { { 3, 4, 5, 6 }, { 7, 8, 9, 0 }, { 1, 2, 3, 4 } }
-    };
-
-    M.GetSubTensor({ 0, 1, 2 }, { 2, 2, 2 }).Generate([]() -> ElementType { return 3; });
-
-    auto R = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
-        { { 1, 2, 3, 4 }, { 5, 6, 3, 3 }, { 9, 0, 3, 3 } },
-        { { 3, 4, 5, 6 }, { 7, 8, 3, 3 }, { 1, 2, 3, 3 } }
-    };
-
-    testing::ProcessTest("TensorReference::Generate", S == T && M == R);
-}
-
-template <typename ElementType, math::Dimension dimension0, math::Dimension dimension1, math::Dimension dimension2>
-void TestTensorTransform()
-{
-    auto T = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
-        { { 1, 2, 3, 4 }, { 5, 6, 7, 8 }, { 9, 0, 1, 2 } },
-        { { 3, 4, 5, 6 }, { 7, 8, 9, 0 }, { 1, 2, 3, 4 } }
-    };
-
-    T.Transform([](ElementType x) { return 2 * x; });
-
-    auto S = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
-        { { 2, 4, 6, 8 }, { 10, 12, 14, 16 }, { 18, 0, 2, 4 } },
-        { { 6, 8, 10, 12 }, { 14, 16, 18, 0 }, { 2, 4, 6, 8 } }
-    };
-
-    auto M = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
-        { { 1, 2, 3, 4 }, { 5, 6, 7, 8 }, { 9, 0, 1, 2 } },
-        { { 3, 4, 5, 6 }, { 7, 8, 9, 0 }, { 1, 2, 3, 4 } }
-    };
-
-    M.GetSubTensor({ 0, 1, 2 }, { 2, 2, 2 }).Transform([](ElementType x) { return 2 * x; });
-
-    auto R = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
-        { { 1, 2, 3, 4 }, { 5, 6, 14, 16 }, { 9, 0, 2, 4 } },
-        { { 3, 4, 5, 6 }, { 7, 8, 18, 0 }, { 1, 2, 6, 8 } }
-    };
-
-    testing::ProcessTest("TensorReference::Transform", S == T && M == R);
-}
-
-template <typename ElementType, math::Dimension dimension0, math::Dimension dimension1, math::Dimension dimension2>
-void TestTensorPlusEqualsOperator()
-{
-    auto T = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
-        { { 1, 2, 3, 4 }, { 5, 6, 7, 8 }, { 9, 0, 1, 2 } },
-        { { 3, 4, 5, 6 }, { 7, 8, 9, 0 }, { 1, 2, 3, 4 } }
-    };
-
-    T += 2;
-
-    auto S = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
-        { { 3, 4, 5, 6 }, { 7, 8, 9, 10 }, { 11, 2, 3, 4 } },
-        { { 5, 6, 7, 8 }, { 9, 10, 11, 2 }, { 3, 4, 5, 6 } }
-    };
-
-    auto M = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
-        { { 1, 2, 3, 4 }, { 5, 6, 7, 8 }, { 9, 0, 1, 2 } },
-        { { 3, 4, 5, 6 }, { 7, 8, 9, 0 }, { 1, 2, 3, 4 } }
-    };
-
-    M.GetSubTensor({ 0, 1, 2 }, { 2, 2, 2 }) += 2;
-
-    auto R = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
-        { { 1, 2, 3, 4 }, { 5, 6, 9, 10 }, { 9, 0, 3, 4 } },
-        { { 3, 4, 5, 6 }, { 7, 8, 11, 2 }, { 1, 2, 5, 6 } }
-    };
-
-    testing::ProcessTest("TensorReference::operator+=", S == T && M == R);
-}
-
-template <typename ElementType, math::Dimension dimension0, math::Dimension dimension1, math::Dimension dimension2>
-void TestTensorMinusEqualsOperator()
-{
-    auto T = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
-        { { 1, 2, 3, 4 }, { 5, 6, 7, 8 }, { 9, 0, 1, 2 } },
-        { { 3, 4, 5, 6 }, { 7, 8, 9, 0 }, { 1, 2, 3, 4 } }
-    };
-
-    T -= -2;
-
-    auto S = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
-        { { 3, 4, 5, 6 }, { 7, 8, 9, 10 }, { 11, 2, 3, 4 } },
-        { { 5, 6, 7, 8 }, { 9, 10, 11, 2 }, { 3, 4, 5, 6 } }
-    };
-
-    auto M = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
-        { { 1, 2, 3, 4 }, { 5, 6, 7, 8 }, { 9, 0, 1, 2 } },
-        { { 3, 4, 5, 6 }, { 7, 8, 9, 0 }, { 1, 2, 3, 4 } }
-    };
-
-    M.GetSubTensor({ 0, 1, 2 }, { 2, 2, 2 }) -= -2;
-
-    auto R = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
-        { { 1, 2, 3, 4 }, { 5, 6, 9, 10 }, { 9, 0, 3, 4 } },
-        { { 3, 4, 5, 6 }, { 7, 8, 11, 2 }, { 1, 2, 5, 6 } }
-    };
-
-    testing::ProcessTest("TensorReference::operator-=", S == T && M == R);
-}
-
-template <typename ElementType, math::Dimension dimension0, math::Dimension dimension1, math::Dimension dimension2>
-void TestTensorTimesEqualsOperator()
-{
-    auto T = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
-        { { 1, 2, 3, 4 }, { 5, 6, 7, 8 }, { 9, 0, 1, 2 } },
-        { { 3, 4, 5, 6 }, { 7, 8, 9, 0 }, { 1, 2, 3, 4 } }
-    };
-
-    T *= 2;
-
-    auto S = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
-        { { 2, 4, 6, 8 }, { 10, 12, 14, 16 }, { 18, 0, 2, 4 } },
-        { { 6, 8, 10, 12 }, { 14, 16, 18, 0 }, { 2, 4, 6, 8 } }
-    };
-
-    auto M = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
-        { { 1, 2, 3, 4 }, { 5, 6, 7, 8 }, { 9, 0, 1, 2 } },
-        { { 3, 4, 5, 6 }, { 7, 8, 9, 0 }, { 1, 2, 3, 4 } }
-    };
-
-    M.GetSubTensor({ 0, 1, 2 }, { 2, 2, 2 }) *= 2;
-
-    auto R = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
-        { { 1, 2, 3, 4 }, { 5, 6, 14, 16 }, { 9, 0, 2, 4 } },
-        { { 3, 4, 5, 6 }, { 7, 8, 18, 0 }, { 1, 2, 6, 8 } }
-    };
-
-    testing::ProcessTest("TensorReference::operator*=", S == T && M == R);
-}
-
-template <typename ElementType, math::Dimension dimension0, math::Dimension dimension1, math::Dimension dimension2>
-void TestTensorDivideEqualsOperator()
-{
-    auto T = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
-        { { 1, 2, 3, 4 }, { 5, 6, 7, 8 }, { 9, 0, 1, 2 } },
-        { { 3, 4, 5, 6 }, { 7, 8, 9, 0 }, { 1, 2, 3, 4 } }
-    };
-
-    T /= 0.5;
-
-    auto S = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
-        { { 2, 4, 6, 8 }, { 10, 12, 14, 16 }, { 18, 0, 2, 4 } },
-        { { 6, 8, 10, 12 }, { 14, 16, 18, 0 }, { 2, 4, 6, 8 } }
-    };
-
-    auto M = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
-        { { 1, 2, 3, 4 }, { 5, 6, 7, 8 }, { 9, 0, 1, 2 } },
-        { { 3, 4, 5, 6 }, { 7, 8, 9, 0 }, { 1, 2, 3, 4 } }
-    };
-
-    M.GetSubTensor({ 0, 1, 2 }, { 2, 2, 2 }) /= 0.5;
-
-    auto R = math::Tensor<ElementType, dimension0, dimension1, dimension2>{
-        { { 1, 2, 3, 4 }, { 5, 6, 14, 16 }, { 9, 0, 2, 4 } },
-        { { 3, 4, 5, 6 }, { 7, 8, 18, 0 }, { 1, 2, 6, 8 } }
-    };
-
-    testing::ProcessTest("TensorReference::operator/=", S == T && M == R);
-}
-
-template <typename ElementType, math::Dimension dimension0, math::Dimension dimension1, math::Dimension dimension2, math::ImplementationType implementation>
-void TestTensorVectorAddUpdate()
-{
-    auto T = math::Tensor<ElementType, dimension0, dimension1, dimension2>(2, 3, 4);
-
-    auto v1 = math::Vector<ElementType, math::VectorOrientation::row>{ 1, 2 };
-    math::AddUpdate<math::Dimension::row, implementation>(v1, T);
-    auto R1 = math::Tensor<ElementType, dimension0, dimension1, dimension2>{ { { 1, 1, 1, 1 }, { 1, 1, 1, 1 }, { 1, 1, 1, 1 } },
-                                                                             { { 2, 2, 2, 2 }, { 2, 2, 2, 2 }, { 2, 2, 2, 2 } } };
-    testing::ProcessTest("void TestTensorVectorAddUpdate()", T == R1);
-
-    T.Fill(0);
-    auto v2 = math::Vector<ElementType, math::VectorOrientation::row>{ 1, 2, 3 };
-    math::AddUpdate<math::Dimension::column, implementation>(v2, T);
-    auto R2 = math::Tensor<ElementType, dimension0, dimension1, dimension2>{ { { 1, 1, 1, 1 }, { 2, 2, 2, 2 }, { 3, 3, 3, 3 } },
-                                                                             { { 1, 1, 1, 1 }, { 2, 2, 2, 2 }, { 3, 3, 3, 3 } } };
-    testing::ProcessTest("void TestTensorVectorAddUpdate()", T == R2);
-
-    T.Fill(0);
-    auto v3 = math::Vector<ElementType, math::VectorOrientation::row>{ 1, 2, 3, 4 };
-    math::AddUpdate<math::Dimension::channel, implementation>(v3, T);
-    auto R3 = math::Tensor<ElementType, dimension0, dimension1, dimension2>{ { { 1, 2, 3, 4 }, { 1, 2, 3, 4 }, { 1, 2, 3, 4 } },
-                                                                             { { 1, 2, 3, 4 }, { 1, 2, 3, 4 }, { 1, 2, 3, 4 } } };
-    testing::ProcessTest("void TestTensorVectorAddUpdate()", T == R3);
-
-    // subtensors
-    auto TT = math::Tensor<ElementType, dimension0, dimension1, dimension2>(10, 10, 10);
-    auto TR = TT.GetSubTensor({ 5, 3, 1 }, { 2, 3, 4 });
-
-    TR.Fill(0);
-    math::AddUpdate<math::Dimension::row, implementation>(v1, TR);
-    testing::ProcessTest("void TestTensorVectorAddUpdate() with subtensor", TR == R1);
-
-    TR.Fill(0);
-    math::AddUpdate<math::Dimension::column, implementation>(v2, TR);
-    testing::ProcessTest("void TestTensorVectorAddUpdate() with subtensor", TR == R2);
-
-    TR.Fill(0);
-    math::AddUpdate<math::Dimension::channel, implementation>(v3, TR);
-    testing::ProcessTest("void TestTensorVectorAddUpdate() with subtensor", TR == R3);
-}
-
-template <typename ElementType, math::Dimension dimension0, math::Dimension dimension1, math::Dimension dimension2, math::ImplementationType implementation>
-void TestTensorVectorMultiply()
-{
-    auto implementationName = math::Internal::MatrixOperations<implementation>::GetImplementationName();
-
-    auto T1 = math::Tensor<ElementType, dimension0, dimension1, dimension2>(2, 3, 4);
-    T1.Fill(1);
-    auto v1 = math::Vector<ElementType, math::VectorOrientation::row>{ 1, 2 };
-    math::ScaleUpdate<math::Dimension::row, implementation>(v1, T1);
-    auto R1 = math::Tensor<ElementType, dimension0, dimension1, dimension2>{ { { 1, 1, 1, 1 }, { 1, 1, 1, 1 }, { 1, 1, 1, 1 } },
-                                                                             { { 2, 2, 2, 2 }, { 2, 2, 2, 2 }, { 2, 2, 2, 2 } } };
-
-    auto T2 = math::Tensor<ElementType, dimension0, dimension1, dimension2>(2, 3, 4);
-    T2.Fill(1);
-    auto v2 = math::Vector<ElementType, math::VectorOrientation::row>{ 1, 2, 3 };
-    math::ScaleUpdate<math::Dimension::column, implementation>(v2, T2);
-    auto R2 = math::Tensor<ElementType, dimension0, dimension1, dimension2>{ { { 1, 1, 1, 1 }, { 2, 2, 2, 2 }, { 3, 3, 3, 3 } },
-                                                                             { { 1, 1, 1, 1 }, { 2, 2, 2, 2 }, { 3, 3, 3, 3 } } };
-
-    auto T3 = math::Tensor<ElementType, dimension0, dimension1, dimension2>(2, 3, 4);
-    T3.Fill(1);
-    auto v3 = math::Vector<ElementType, math::VectorOrientation::row>{ 1, 2, 3, 4 };
-    math::ScaleUpdate<math::Dimension::channel, implementation>(v3, T3);
-    auto R3 = math::Tensor<ElementType, dimension0, dimension1, dimension2>{ { { 1, 2, 3, 4 }, { 1, 2, 3, 4 }, { 1, 2, 3, 4 } },
-                                                                             { { 1, 2, 3, 4 }, { 1, 2, 3, 4 }, { 1, 2, 3, 4 } } };
-
-    // subtensors
-    auto S1 = math::Tensor<ElementType, dimension0, dimension1, dimension2>(10, 10, 10);
-    auto M1 = S1.GetSubTensor({ 5, 3, 1 }, { 2, 3, 4 });
-    M1.Fill(1);
-    math::ScaleUpdate<math::Dimension::row, implementation>(v1, M1);
-
-    auto S2 = math::Tensor<ElementType, dimension0, dimension1, dimension2>(10, 10, 10);
-    auto M2 = S2.GetSubTensor({ 5, 3, 1 }, { 2, 3, 4 });
-    M2.Fill(1);
-    math::ScaleUpdate<math::Dimension::column, implementation>(v2, M2);
-
-    auto S3 = math::Tensor<ElementType, dimension0, dimension1, dimension2>(10, 10, 10);
-    auto M3 = S3.GetSubTensor({ 5, 3, 1 }, { 2, 3, 4 });
-    M3.Fill(1);
-    math::ScaleUpdate<math::Dimension::channel, implementation>(v3, M3);
-
-    testing::ProcessTest(implementationName + "::Multiply(Vector, Tensor)", T1 == R1 && T2 == R2 && T3 == R3 && M1 == R1 && M2 == R2 && M3 == R3);
-}
-
-template <typename ElementType, math::Dimension dimension0, math::Dimension dimension1, math::Dimension dimension2, math::ImplementationType implementation>
-void TestTensorVectorScaleAddUpdate()
-{
-    auto T = math::Tensor<ElementType, dimension0, dimension1, dimension2>(2, 3, 4);
-    T.Fill(1);
-    auto s1 = math::Vector<ElementType, math::VectorOrientation::row>{ 1, 2 };
-    auto b1 = math::Vector<ElementType, math::VectorOrientation::row>{ 3, 4 };
-    math::ScaleAddUpdate<math::Dimension::row, implementation>(s1, b1, T);
-    auto R1 = math::Tensor<ElementType, dimension0, dimension1, dimension2>{ { { 4, 4, 4, 4 }, { 4, 4, 4, 4 }, { 4, 4, 4, 4 } },
-                                                                             { { 6, 6, 6, 6 }, { 6, 6, 6, 6 }, { 6, 6, 6, 6 } } };
-    testing::ProcessTest("void TestTensorVectorScaleAddUpdate()", T == R1);
-
-    T.Fill(1);
-    auto s2 = math::Vector<ElementType, math::VectorOrientation::row>{ 1, 2, 3 };
-    auto b2 = math::Vector<ElementType, math::VectorOrientation::row>{ 4, 5, 6 };
-    math::ScaleAddUpdate<math::Dimension::column, implementation>(s2, b2, T);
-    auto R2 = math::Tensor<ElementType, dimension0, dimension1, dimension2>{ { { 5, 5, 5, 5 }, { 7, 7, 7, 7 }, { 9, 9, 9, 9 } },
-                                                                             { { 5, 5, 5, 5 }, { 7, 7, 7, 7 }, { 9, 9, 9, 9 } } };
-    testing::ProcessTest("void TestTensorVectorScaleAddUpdate()", T == R2);
-
-    T.Fill(1);
-    auto s3 = math::Vector<ElementType, math::VectorOrientation::row>{ 1, 2, 3, 4 };
-    auto b3 = math::Vector<ElementType, math::VectorOrientation::row>{ 1, 1, 2, 2 };
-    math::ScaleAddUpdate<math::Dimension::channel, implementation>(s3, b3, T);
-    auto R3 = math::Tensor<ElementType, dimension0, dimension1, dimension2>{ { { 2, 3, 5, 6 }, { 2, 3, 5, 6 }, { 2, 3, 5, 6 } },
-                                                                             { { 2, 3, 5, 6 }, { 2, 3, 5, 6 }, { 2, 3, 5, 6 } } };
-    testing::ProcessTest("void TestTensorVectorScaleAddUpdate()", T == R3);
-
-    // subtensors
-    auto TT = math::Tensor<ElementType, dimension0, dimension1, dimension2>(10, 10, 10);
-    auto TR = TT.GetSubTensor({ 5, 3, 1 }, { 2, 3, 4 });
-
-    TR.Fill(1);
-    math::ScaleAddUpdate<math::Dimension::row, implementation>(s1, b1, TR);
-    testing::ProcessTest("void TestTensorVectorScaleAddUpdate() with subtensor", TR == R1);
-
-    TR.Fill(1);
-    math::ScaleAddUpdate<math::Dimension::column, implementation>(s2, b2, TR);
-    testing::ProcessTest("void TestTensorVectorScaleAddUpdate() with subtensor", TR == R2);
-
-    TR.Fill(1);
-    math::ScaleAddUpdate<math::Dimension::channel, implementation>(s3, b3, TR);
-    testing::ProcessTest("void TestTensorVectoScaleAddUpdate() with subtensor", TR == R3);
-}
-
-template <typename ElementType, math::Dimension dimension0, math::Dimension dimension1, math::Dimension dimension2>
-void TestTensorArchiver()
-{
-    math::Tensor<ElementType, dimension0, dimension1, dimension2> T(10, 20, 30);
-
-    T(3, 2, 1) = 2.0;
-    T(4, 3, 2) = 3.0;
-    T(3, 3, 3) = 4.0;
-
-    utilities::SerializationContext context;
-    std::stringstream strstream;
-    utilities::JsonArchiver archiver(strstream);
-
-    math::TensorArchiver::Write(T, "test", archiver);
-    utilities::JsonUnarchiver unarchiver(strstream, context);
-
-    math::Tensor<ElementType, dimension0, dimension1, dimension2> Ta(0, 0, 0);
-    math::TensorArchiver::Read(Ta, "test", unarchiver);
-    testing::ProcessTest("void TestTensorArchiver(), write and read tensor", Ta == T);
-}
diff --git a/libraries/math/test/tcc/Vector_test.tcc b/libraries/math/test/tcc/Vector_test.tcc
deleted file mode 100644
index 207f2ad65..000000000
--- a/libraries/math/test/tcc/Vector_test.tcc
+++ /dev/null
@@ -1,1014 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     Vector_test.tcc (math_test)
-//  Authors:  Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#include <math/include/VectorOperations.h>
-
-#include <testing/include/testing.h>
-
-#include <utilities/include/JsonArchiver.h>
-
-#include <sstream>
-
-template <typename ElementType>
-void TestVectorIndexer()
-{
-    math::RowVector<ElementType> v{ 1, 2, 3, 4, 5, 6, 7 };
-    auto u = v.GetSubVector(2, 2);
-
-    math::RowMatrix<ElementType> M{ { 1, 2, 3 }, { 4, 5, 6 }, { 7, 8, 9 } };
-    math::ColumnMatrix<ElementType> N(M);
-    auto w = M.GetRow(1);
-    auto z = N.GetRow(1);
-
-    testing::ProcessTest("Vector::Operator[]", v[0] == 1 && v[1] == 2 && v[6] == 7 && u[0] == 3 && u[1] == 4 && w[0] == 4 && w[1] == 5 && w[2] == 6 && z[0] == 4 && z[1] == 5 && z[2] == 6);
-}
-
-template <typename ElementType>
-void TestVectorSize()
-{
-    math::RowVector<ElementType> u{};
-    math::RowVector<ElementType> v{ 1, 2, 3, 4, 5, 6, 7 };
-    auto w = v.GetSubVector(2, 3);
-
-    testing::ProcessTest("Vector::Size", v.Size() == 7 && u.Size() == 0 && w.Size() == 3);
-}
-
-template <typename ElementType>
-void TestVectorGetDataPointer()
-{
-    math::RowVector<ElementType> v{ 1, 2, 3, 4, 5, 6, 7 };
-    auto u = v.GetSubVector(2, 2);
-
-    testing::ProcessTest("Vector::GetDataPointer", &(v[0]) == v.GetDataPointer() && v.GetDataPointer() + 2 == u.GetDataPointer());
-}
-
-template <typename ElementType>
-void TestVectorGetIncrement()
-{
-    math::RowVector<ElementType> v{ 1, 2, 3, 4, 5, 6, 7 };
-    auto u = v.GetSubVector(2, 2);
-
-    math::RowMatrix<ElementType> M{ { 1, 2, 3 }, { 4, 5, 6 }, { 7, 8, 9 } };
-    math::ColumnMatrix<ElementType> N(M);
-    auto w = M.GetRow(1);
-    auto z = N.GetRow(1);
-
-    testing::ProcessTest("Vector::GetIncrement", v.GetIncrement() == 1 && u.GetIncrement() == 1 && w.GetIncrement() == 1 && z.GetIncrement() == 3);
-}
-
-template <typename ElementType>
-void TestVectorNorm0()
-{
-    math::RowVector<ElementType> x{ 0, 1, 0, -2, 0 };
-    auto v = x.GetSubVector(2, 2);
-
-    testing::ProcessTest("Vector::Norm0", x.Norm0() == 2 && v.Norm0() == 1);
-}
-
-template <typename ElementType>
-void TestVectorNorm1()
-{
-    math::RowVector<ElementType> x{ 0, 1, 0, -2, 0 };
-    auto v = x.GetSubVector(2, 2);
-
-    testing::ProcessTest("Vector::Norm1", x.Norm1() == 3 && v.Norm2() == 2);
-}
-
-template <typename ElementType>
-void TestVectorNorm2()
-{
-    math::RowVector<ElementType> x{ 0, 1, 0, -2, 0 };
-    auto v = x.GetSubVector(2, 2);
-
-    testing::ProcessTest("Vector::Norm2", testing::IsEqual(x.Norm2(), static_cast<ElementType>(std::sqrt(5))) && v.Norm2() == 2);
-}
-
-template <typename ElementType>
-void TestVectorNorm2Squared()
-{
-    math::RowVector<ElementType> x{ 0, 1, 0, -2, 0 };
-    auto v = x.GetSubVector(2, 2);
-
-    testing::ProcessTest("Vector::Norm2Squared", x.Norm2Squared() == 5 && v.Norm2Squared() == 4);
-}
-
-template <typename ElementType>
-void TestVectorToArray()
-{
-    std::vector<ElementType> r0{ 41, 47, 53, 59 };
-    std::vector<ElementType> r1{ 15, 25, 23, 33 };
-
-    math::RowVector<ElementType> p(r0);
-    math::ColumnVector<ElementType> q(r1);
-
-    math::Matrix<ElementType, math::MatrixLayout::rowMajor> A{
-        { 41, 47, 53, 59 },
-        { 40, 45, 56, 61 },
-        { 15, 25, 23, 33 },
-    };
-    std::vector<ElementType> r(A.GetRow(0).ToArray());
-    std::vector<ElementType> s(A.GetRow(2).ToArray());
-
-    math::Matrix<ElementType, math::MatrixLayout::columnMajor> B(A);
-    std::vector<ElementType> t(B.GetRow(0).ToArray());
-    std::vector<ElementType> u(B.GetRow(2).ToArray());
-
-    testing::ProcessTest("Vector::ToArray", p.ToArray() == r0 && q.ToArray() == r1 && r == r0 && s == r1 && t == r0 && u == r1);
-}
-
-template <typename ElementType, math::VectorOrientation orientation>
-void TestVectorEqualityOperator()
-{
-    math::Vector<ElementType, orientation> u{ 1, 2, 3, 4, 5 };
-    math::Vector<ElementType, orientation> v{ 1, 2, 3, 4, 5 };
-
-    testing::ProcessTest("Vector::operator==", u == v);
-}
-
-template <typename ElementType, math::VectorOrientation orientation>
-void TestVectorInequalityOperator()
-{
-    math::Vector<ElementType, orientation> u{ 1, 2, 3, 4, 5 };
-    math::Vector<ElementType, orientation> v{ 1, 2, 3, 4, 5 };
-    math::Vector<ElementType, orientation> w{ -1, 2, 3, 4, 5 };
-    math::Vector<ElementType, orientation> z{ 1, 2, 3, 4 };
-
-    testing::ProcessTest("Vector::operator!=", u != w && u != v.Transpose() && u != z);
-}
-
-template <typename ElementType, math::VectorOrientation orientation>
-void TestVectorGetConstReference()
-{
-    math::Vector<ElementType, orientation> u{ 1, 2, 3, 4, 5 };
-    auto v = u.GetConstReference();
-
-    testing::ProcessTest("Vector::GetConstReference", u == v);
-}
-
-template <typename ElementType, math::VectorOrientation orientation>
-void TestVectorGetSubVector()
-{
-    math::Vector<ElementType, orientation> u{ 1, 2, 3, 4, 5 };
-    auto v = u.GetSubVector(2, 2);
-
-    math::Matrix<ElementType, math::MatrixLayout::rowMajor> A{
-        { 41, 47, 53, 59 },
-        { 40, 45, 56, 61 },
-        { 15, 25, 23, 33 },
-    };
-    auto w = A.GetColumn(2);
-    auto z = w.GetSubVector(1, 2);
-
-    testing::ProcessTest("Vector::GetSubVector", v[0] == 3 && v[1] == 4 && z[0] == 56 && z[1] == 23);
-}
-
-template <typename ElementType, math::VectorOrientation orientation>
-void TestVectorTranspose()
-{
-    math::Vector<ElementType, orientation> v{ 1, 2, 3, 4, 5, 6, 7 };
-    auto u = v.Transpose();
-    math::Vector<ElementType, math::TransposeVectorOrientation<orientation>::value> w{ 1, 2, 3, 4, 5, 6, 7 };
-
-    auto x = v.GetSubVector(2, 3).Transpose();
-    math::Vector<ElementType, math::TransposeVectorOrientation<orientation>::value> z{ 3, 4, 5 };
-
-    testing::ProcessTest("Vector::Transpose", u == w && x == z);
-}
-
-template <typename ElementType>
-void TestVectorSwap()
-{
-    math::RowVector<ElementType> v{ 1, 2, 3, 4, 5, 6, 7 };
-    math::RowVector<ElementType> u{ -1, -2, -3, -4, -5, -6, -7 };
-    math::RowVector<ElementType> s{ -1, -2, -3, -4, -5, -6, -7 };
-    math::RowVector<ElementType> t{ 1, 2, 3, 4, 5, 6, 7 };
-    v.Swap(u);
-
-    testing::ProcessTest("Vector::Swap", v == s && u == t);
-}
-
-template <typename ElementType, math::VectorOrientation orientation>
-void TestVectorCopyFrom()
-{
-    math::Vector<ElementType, orientation> v{ 1, 2, 3, 4, 5, 6, 7 };
-    math::Vector<ElementType, orientation> u(7);
-    u.CopyFrom(v);
-
-    math::RowMatrix<ElementType> M{ { 1, 2, 3 }, { 4, 5, 6 }, { 7, 8, 9 } };
-    math::ColumnVector<ElementType> x{ 11, 12, 13 };
-    M.GetColumn(1).CopyFrom(x);
-    math::RowMatrix<ElementType> R{ { 1, 11, 3 }, { 4, 12, 6 }, { 7, 13, 9 } };
-
-    testing::ProcessTest("Vector::CopyFrom", u == v && M == R);
-}
-
-template <typename ElementType, math::VectorOrientation orientation>
-void TestVectorReset()
-{
-    math::Vector<ElementType, orientation> v{ 1, 2, 3, 4, 5, 6, 7 };
-    v.GetSubVector(1, 2).Reset();
-    math::Vector<ElementType, orientation> r{ 1, 0, 0, 4, 5, 6, 7 };
-
-    math::RowMatrix<ElementType> M{ { 1, 2, 3 }, { 4, 5, 6 }, { 7, 8, 9 } };
-    M.GetColumn(1).Reset();
-    M.GetRow(1).Reset();
-    math::RowMatrix<ElementType> R{ { 1, 0, 3 }, { 0, 0, 0 }, { 7, 0, 9 } };
-
-    testing::ProcessTest("Vector::Reset", v == r && M == R);
-}
-
-template <typename ElementType, math::VectorOrientation orientation>
-void TestVectorFill()
-{
-    math::Vector<ElementType, orientation> v(10);
-    v.Fill(2);
-    math::Vector<ElementType, orientation> r{ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 };
-
-    math::RowMatrix<ElementType> M{ { 1, 2, 3 }, { 4, 5, 6 }, { 7, 8, 9 } };
-    M.GetColumn(1).Fill(-1);
-    M.GetRow(1).Fill(1);
-    math::RowMatrix<ElementType> R{ { 1, -1, 3 }, { 1, 1, 1 }, { 7, -1, 9 } };
-
-    testing::ProcessTest("Vector::Fill", v == r && M == R);
-}
-
-template <typename ElementType, math::VectorOrientation orientation>
-void TestVectorGenerate()
-{
-    math::Vector<ElementType, orientation> v{ 1, 2, 3, 4, 5, 6, 7 };
-    v.GetSubVector(1, 2).Generate([]() -> ElementType { return -1; });
-    math::Vector<ElementType, orientation> r{ 1, -1, -1, 4, 5, 6, 7 };
-
-    math::RowMatrix<ElementType> M{ { 1, 2, 3 }, { 4, 5, 6 }, { 7, 8, 9 } };
-    M.GetColumn(1).Generate([]() -> ElementType { return -1.0; });
-    M.GetRow(1).Generate([]() -> ElementType { return 1.0; });
-    math::RowMatrix<ElementType> R{ { 1, -1, 3 }, { 1, 1, 1 }, { 7, -1, 9 } };
-
-    testing::ProcessTest("Vector::Generate", v == r && M == R);
-}
-
-template <typename ElementType, math::VectorOrientation orientation>
-void TestVectorTransform()
-{
-    math::Vector<ElementType, orientation> v{ 1, 2, 3, 4, 5, 6, 7 };
-    v.Transform([](ElementType value) { return value * 2; });
-    math::Vector<ElementType, orientation> u{ 2, 4, 6, 8, 10, 12, 14 };
-
-    math::RowMatrix<ElementType> M{ { 1, 2, 3 }, { 4, 5, 6 }, { 7, 8, 9 } };
-    M.GetColumn(1).Transform([](ElementType value) { return value * 2; });
-    math::ColumnVector<ElementType> w{ 4, 10, 16 };
-
-    math::Vector<ElementType, orientation> z{ 1, -2, 3, -4, -5, 6, -7 };
-    z.Transform(math::AbsoluteValueTransformation<ElementType>);
-    math::Vector<ElementType, orientation> y{ 1, 2, 3, 4, 5, 6, 7 };
-
-    testing::ProcessTest("Vector::Transform", v == u && M.GetColumn(1) == w && z == y);
-}
-
-template <typename ElementType, math::VectorOrientation orientation>
-void TestVectorResize()
-{
-    math::Vector<ElementType, orientation> v{ 1, 2, 3, 4, 5, 6, 7 };
-    v.Resize(3);
-    math::Vector<ElementType, orientation> r{ 1, 2, 3 };
-
-    math::Vector<ElementType, orientation> u{ 1, 2, 3, 4, 5, 6, 7 };
-    u.Resize(10);
-    math::Vector<ElementType, orientation> s{ 1, 2, 3, 4, 5, 6, 7, 0, 0, 0 };
-
-    testing::ProcessTest("Vector::Resize", v.Size() == 3 && v == r && u == s);
-}
-
-template <typename ElementType, math::VectorOrientation orientation>
-void TestVectorPrint()
-{
-    std::stringstream stream;
-    math::Vector<ElementType, orientation> u{ 0, 2, 0, 4, 0, 0, 0 };
-    math::Print(u, stream);
-    auto x = stream.str();
-
-    testing::ProcessTest("Print(Vector)", stream.str() == "{ 0, 2, 0, 4, 0, 0, 0 }");
-}
-
-template <typename ElementType, math::VectorOrientation orientation>
-void TestScalarVectorMultiply()
-{
-    math::Vector<ElementType, orientation> u{ 1, 2, 3, 4, 5 };
-    math::Vector<ElementType, orientation> v{ 2, 0, -1, 0, 1 };
-    u += 2 * v;
-    math::Vector<ElementType, orientation> r{ 5, 2, 1, 4, 7 };
-
-    testing::ProcessTest("scalar * Vector", u == r);
-}
-
-template <typename ElementType, math::VectorOrientation orientation>
-void TestVectorSquare()
-{
-    math::Vector<ElementType, orientation> u(5);
-    math::Vector<ElementType, orientation> v{ 1, -1, 2, -2, 3 };
-    u += Square(v);
-    math::Vector<ElementType, orientation> r{ 1, 1, 4, 4, 9 };
-
-    testing::ProcessTest("Square(Vector)", u == r);
-}
-
-template <typename ElementType, math::VectorOrientation orientation>
-void TestVectorSqrt()
-{
-    math::Vector<ElementType, orientation> u(5);
-    math::Vector<ElementType, orientation> v{ 1, 1, 4, 4, 9 };
-    u += Sqrt(v);
-    math::Vector<ElementType, orientation> r{ 1, 1, 2, 2, 3 };
-
-    math::Vector<ElementType, orientation> w{ 1, 1, 4, 4, 9 };
-    math::TransformUpdate(math::SquareRootTransformation<ElementType>, w);
-
-    testing::ProcessTest("Sqrt(Vector)", testing::IsEqual(u.ToArray(), r.ToArray()) && testing::IsEqual(w.ToArray(), r.ToArray()));
-}
-
-template <typename ElementType, math::VectorOrientation orientation>
-void TestVectorAbs()
-{
-    math::Vector<ElementType, orientation> u(5);
-    math::Vector<ElementType, orientation> v{ 1, -1, 2, -2, 3 };
-    u += Abs(v);
-    math::Vector<ElementType, orientation> r{ 1, 1, 2, 2, 3 };
-
-    math::Vector<ElementType, orientation> w{ 1, -1, 2, -2, 3 };
-    math::TransformUpdate(math::AbsoluteValueTransformation<ElementType>, w);
-
-    testing::ProcessTest("Abs(Vector)", w == r);
-}
-
-template <typename ElementType, math::VectorOrientation orientation>
-void TestVectorPlusEqualsOperator()
-{
-    math::Vector<ElementType, orientation> v{ 1, -1, 2, -2, 3 };
-    v += -2;
-    math::Vector<ElementType, orientation> r{ -1, -3, 0, -4, 1 };
-
-    testing::ProcessTest("Add(scalar, Vector)", v == r);
-}
-
-template <typename ElementType, math::VectorOrientation orientation>
-void TestVectorMinusEqualsOperator()
-{
-    math::Vector<ElementType, orientation> v{ 1, -1, 2, -2, 3 };
-    v -= 2;
-    math::Vector<ElementType, orientation> r{ -1, -3, 0, -4, 1 };
-
-    testing::ProcessTest("Add(scalar, Vector)", v == r);
-}
-
-template <typename ElementType, math::VectorOrientation orientation>
-void TestVectorTimesEqualsOperator()
-{
-    math::Vector<ElementType, orientation> v{ 1, -1, 2, -2, 3 };
-    v *= -2;
-    math::Vector<ElementType, orientation> r{ -2, 2, -4, 4, -6 };
-
-    testing::ProcessTest("Vector::operator*=", v == r);
-}
-
-template <typename ElementType, math::VectorOrientation orientation>
-void TestVectorDivideEqualsOperator()
-{
-    math::Vector<ElementType, orientation> v{ 1, -1, 2, -2, 3 };
-    v /= -0.5;
-    math::Vector<ElementType, orientation> r{ -2, 2, -4, 4, -6 };
-
-    testing::ProcessTest("Vector::operator/=", v == r);
-}
-
-template <typename ElementType, math::VectorOrientation orientation>
-void TestVectorElementwiseMultiplySet()
-{
-    math::Vector<ElementType, orientation> u{ 1, 2, 3, 4, 5 };
-    math::Vector<ElementType, orientation> v{ 2, 0, -1, 0, 1 };
-    math::Vector<ElementType, orientation> w(5);
-    math::ElementwiseMultiplySet(u, v, w);
-    math::Vector<ElementType, orientation> r{ 2, 0, -3, 0, 5 };
-
-    testing::ProcessTest("ElementwiseMultiplySet(Vector, Vector)", w == r);
-}
-
-template <typename ElementType, math::VectorOrientation orientation>
-void TestVectorVectorDot()
-{
-    math::Vector<ElementType, orientation> u{ 1, 2, 3, 4, 5 };
-    math::Vector<ElementType, orientation> v{ 1, -1, 2, -2, 3 };
-    auto result = math::Dot(u, v);
-
-    testing::ProcessTest("Dot(Vector, Vector)", result == 12);
-}
-
-template <typename ElementType, math::MatrixLayout layout, math::ImplementationType implementation>
-void TestVectorVectorOuter()
-{
-    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
-
-    math::ColumnVector<ElementType> u{ 1, 2, 3 };
-    math::RowVector<ElementType> v{ 1, -1 };
-    math::Matrix<ElementType, layout> A(3, 2);
-
-    math::OuterProduct<implementation>(u, v, A);
-
-    math::ColumnMatrix<ElementType> B{ { 1, -1 }, { 2, -2 }, { 3, -3 } };
-    testing::ProcessTest(implementationName + "::OuterProduct(Vector, Vector)", A == B);
-}
-
-template <typename ElementType, math::ImplementationType implementation>
-void TestVectorVectorInner()
-{
-    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
-
-    math::RowVector<ElementType> u{ 1, 2, 3, 4, 5 };
-    math::ColumnVector<ElementType> v{ 1, -1, 2, -2, 3 };
-    ElementType result;
-    math::InnerProduct<implementation>(u, v, result);
-
-    testing::ProcessTest(implementationName + "::InnerProduct(Vector, Vector)", result == 12);
-}
-
-template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
-void TestVectorAddUpdateScalar()
-{
-    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
-
-    ElementType a = 3;
-    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
-    math::AddUpdate(a, u);
-
-    math::Vector<ElementType, orientation> w{ -2, 0, 1, 1 };
-    math::ScaleAddUpdate<implementation>(a, math::OnesVector(), static_cast<ElementType>(1), w);
-
-    math::Vector<ElementType, orientation> r{ 1, 3, 4, 4 };
-    testing::ProcessTest(implementationName + "::AddUpdate(scalar, Vector)", u == r && w == r);
-}
-
-template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
-void TestVectorAddUpdateVector()
-{
-    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
-
-    math::Vector<ElementType, orientation> v{ 1, -1, 2, -2 };
-    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
-    math::AddUpdate<implementation>(v, u);
-
-    math::Vector<ElementType, orientation> w{ -2, 0, 1, 1 };
-    math::ScaleAddUpdate<implementation>(static_cast<ElementType>(1), v, static_cast<ElementType>(1), w);
-
-    math::Vector<ElementType, orientation> r{ -1, -1, 3, -1 };
-    testing::ProcessTest(implementationName + "::AddUpdate(Vector, Vector)", u == r && w == r);
-}
-
-template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
-void TestVectorAddSetScalar()
-{
-    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
-
-    ElementType a = 3.0;
-    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
-    math::Vector<ElementType, orientation> z(4);
-    math::AddSet<implementation>(a, u, z);
-
-    math::Vector<ElementType, orientation> w(4);
-    math::ScaleAddSet<implementation>(a, math::OnesVector(), static_cast<ElementType>(1), u, w);
-
-    math::Vector<ElementType, orientation> r{ 1, 3, 4, 4 };
-    testing::ProcessTest(implementationName + "::AddSet(scalar, Vector, output)", z == r && w == r);
-}
-
-template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
-void TestVectorAddSetScalarZero()
-{
-    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
-
-    ElementType a = 0.0;
-    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
-    math::Vector<ElementType, orientation> z(4);
-    math::AddSet<implementation>(a, u, z);
-
-    math::Vector<ElementType, orientation> w(4);
-    math::ScaleAddSet<implementation>(a, math::OnesVector(), static_cast<ElementType>(1), u, w);
-
-    testing::ProcessTest(implementationName + "::AddSet(0.0, Vector, output)", z == u && w == u);
-}
-
-template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
-void TestVectorAddSetScalarOne()
-{
-    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
-
-    ElementType a = 1.0;
-    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
-    math::Vector<ElementType, orientation> z(4);
-    math::AddSet<implementation>(a, u, z);
-
-    math::Vector<ElementType, orientation> w(4);
-    math::ScaleAddSet<implementation>(a, math::OnesVector(), static_cast<ElementType>(1), u, w);
-
-    math::Vector<ElementType, orientation> r{ -1, 1, 2, 2 };
-    testing::ProcessTest(implementationName + "::AddSet(1.0, Vector, output)", z == r && w == r);
-}
-
-template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
-void TestVectorAddSetVector()
-{
-    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
-
-    math::Vector<ElementType, orientation> v{ 1, -1, 2, -2 };
-    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
-    math::Vector<ElementType, orientation> z(4);
-
-    math::AddSet<implementation>(v, u, z);
-
-    math::Vector<ElementType, orientation> w(4);
-    math::ScaleAddSet<implementation>(static_cast<ElementType>(1), v, static_cast<ElementType>(1), u, w);
-
-    math::Vector<ElementType, orientation> r{ -1, -1, 3, -1 };
-    testing::ProcessTest(implementationName + "::ScaleAddUpdate(1.0, Vector, 1.0, Vector)", z == r && w == r);
-}
-
-template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
-void TestVectorScaleUpdate()
-{
-    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
-
-    ElementType b = 2.0;
-    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
-    math::ScaleUpdate<implementation>(b, u);
-
-    math::Vector<ElementType, orientation> r{ -4, 0, 2, 2 };
-    testing::ProcessTest(implementationName + "::ScaleUpdate(scalar, Vector)", u == r);
-}
-
-template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
-void TestVectorScaleUpdateZero()
-{
-    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
-
-    ElementType b = 0.0;
-    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
-    math::ScaleUpdate<implementation>(b, u);
-
-    math::Vector<ElementType, orientation> r{ 0, 0, 0, 0 };
-    testing::ProcessTest(implementationName + "::ScaleUpdate(0.0, Vector)", u == r);
-}
-
-template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
-void TestVectorScaleUpdateOne()
-{
-    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
-
-    ElementType b = 1.0;
-    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
-    math::ScaleUpdate<implementation>(b, u);
-
-    math::Vector<ElementType, orientation> r{ -2, 0, 1, 1 };
-    testing::ProcessTest(implementationName + "::ScaleUpdate(1.0, Vector)", u == r);
-}
-
-template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
-void TestVectorScaleSet()
-{
-    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
-
-    ElementType a = 3.0;
-    math::Vector<ElementType, orientation> v{ 1, -1, 2, -2 };
-    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
-    math::ScaleSet<implementation>(a, v, u);
-
-    math::Vector<ElementType, orientation> r{ 3, -3, 6, -6 };
-    testing::ProcessTest(implementationName + "::ScaleSet(scalar, Vector, Vector)", u == r);
-}
-
-template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
-void TestVectorScaleSetZero()
-{
-    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
-
-    ElementType a = 0.0;
-    math::Vector<ElementType, orientation> v{ 1, -1, 2, -2 };
-    math::Vector<ElementType, orientation> u{ 2, 0, 1, 1 };
-    math::ScaleSet<implementation>(a, v, u);
-
-    math::Vector<ElementType, orientation> r{ 0, 0, 0, 0 };
-    testing::ProcessTest(implementationName + "::ScaleSet(0.0, Vector, Vector)", u == r);
-}
-
-template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
-void TestVectorScaleSetOne()
-{
-    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
-
-    ElementType a = 1.0;
-    math::Vector<ElementType, orientation> v{ 1, -1, 2, -2 };
-    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
-    math::ScaleSet<implementation>(a, v, u);
-
-    math::Vector<ElementType, orientation> r{ 1, -1, 2, -2 };
-    testing::ProcessTest(implementationName + "::ScaleSet(1.0, Vector, Vector)", u == r);
-}
-
-template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
-void TestVectorScaleAddUpdateScalarVectorOne()
-{
-    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
-
-    ElementType a = 3.0;
-    math::Vector<ElementType, orientation> v{ 1, -1, 2, -2 };
-    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
-    math::ScaleAddUpdate<implementation>(a, v, math::One(), u);
-
-    math::Vector<ElementType, orientation> w{ -2, 0, 1, 1 };
-    math::ScaleAddUpdate<implementation>(a, v, static_cast<ElementType>(1), w);
-
-    math::Vector<ElementType, orientation> r{ 1, -3, 7, -5 };
-    testing::ProcessTest(implementationName + "::ScaleAddUpdate(scalar, Vector, 1.0, Vector)", u == r && w == r);
-}
-
-template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
-void TestVectorScaleAddUpdateScalarOnesScalar()
-{
-    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
-
-    ElementType a = 3.0;
-    ElementType b = 2.0;
-    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
-    math::ScaleAddUpdate<implementation>(a, math::OnesVector(), b, u);
-
-    math::Vector<ElementType, orientation> r{ -1, 3, 5, 5 };
-    testing::ProcessTest(implementationName + "::ScaleAddUpdate(scalar, Ones, scalar, Vector)", u == r);
-}
-
-template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
-void TestVectorScaleAddUpdateOneVectorScalar()
-{
-    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
-
-    math::Vector<ElementType, orientation> v{ 1, -1, 2, -2 };
-    ElementType b = 2.0;
-    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
-    math::ScaleAddUpdate<implementation>(math::One(), v, b, u);
-
-    math::Vector<ElementType, orientation> w{ -2, 0, 1, 1 };
-    math::ScaleAddUpdate<implementation>(static_cast<ElementType>(1), v, b, w);
-
-    math::Vector<ElementType, orientation> r{ -3, -1, 4, 0 };
-    testing::ProcessTest(implementationName + "::ScaleAddUpdate(1.0, Vector, scalar, Vector)", u == r && w == r);
-}
-
-template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
-void TestVectorScaleAddUpdateScalarVectorScalar()
-{
-    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
-
-    ElementType a = 3.0;
-    math::Vector<ElementType, orientation> v{ 1, -1, 2, -2 };
-    ElementType b = 2.0;
-    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
-
-    math::ScaleAddUpdate<implementation>(a, v, b, u);
-
-    math::Vector<ElementType, orientation> r{ -1, -3, 8, -4 };
-    testing::ProcessTest(implementationName + "::ScaleAddUpdate(scalar, Vector, scalar, Vector)", u == r);
-}
-
-template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
-void TestVectorScaleAddSetOnes()
-{
-    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
-
-    ElementType a = 3.0;
-    ElementType b = 2.0;
-    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
-    math::Vector<ElementType, orientation> z(4);
-
-    math::ScaleAddSet<implementation>(a, math::OnesVector(), b, u, z);
-
-    math::Vector<ElementType, orientation> r{ -1, 3, 5, 5 };
-    testing::ProcessTest(implementationName + "::ScaleAddSet(scalar, ones, scalar, Vector, output)", z == r);
-}
-
-template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
-void TestVectorScaleAddSetOnesScalarZero()
-{
-    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
-
-    ElementType a = 3.0;
-    ElementType b = 0.0;
-    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
-    math::Vector<ElementType, orientation> z(4);
-
-    math::ScaleAddSet<implementation>(a, math::OnesVector(), b, u, z);
-
-    math::Vector<ElementType, orientation> r{ 3, 3, 3, 3 };
-    testing::ProcessTest(implementationName + "::ScaleAddSet(scalar, ones, 0.0, Vector, output)", z == r);
-}
-
-template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
-void TestVectorScaleAddSetOnesScalarOne()
-{
-    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
-
-    ElementType a = 3.0;
-    ElementType b = 1.0;
-    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
-    math::Vector<ElementType, orientation> z(4);
-
-    math::ScaleAddSet<implementation>(a, math::OnesVector(), b, u, z);
-
-    math::Vector<ElementType, orientation> r{ 1, 3, 4, 4 };
-    testing::ProcessTest(implementationName + "::ScaleAddSet(scalar, ones, 1.0, Vector, output)", z == r);
-}
-
-template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
-void TestVectorScaleAddSetOnesZeroScalar()
-{
-    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
-
-    ElementType a = 0.0;
-    ElementType b = 2.0;
-    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
-    math::Vector<ElementType, orientation> z(4);
-
-    math::ScaleAddSet<implementation>(a, math::OnesVector(), b, u, z);
-
-    math::Vector<ElementType, orientation> r{ -4, 0, 2, 2 };
-    testing::ProcessTest(implementationName + "::ScaleAddSet(0.0, ones, scalar, Vector, output)", z == r);
-}
-
-template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
-void TestVectorScaleAddSetOnesOneScalar()
-{
-    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
-
-    ElementType a = 1.0;
-    ElementType b = 2.0;
-    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
-    math::Vector<ElementType, orientation> z(4);
-
-    math::ScaleAddSet<implementation>(a, math::OnesVector(), b, u, z);
-
-    math::Vector<ElementType, orientation> r{ -3, 1, 3, 3 };
-    testing::ProcessTest(implementationName + "::ScaleAddSet(1.0, ones, scalar, Vector, output)", z == r);
-}
-
-template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
-void TestVectorScaleAddSetOnesZeroOne()
-{
-    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
-
-    ElementType a = 0.0;
-    ElementType b = 1.0;
-    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
-    math::Vector<ElementType, orientation> z(4);
-
-    math::ScaleAddSet<implementation>(a, math::OnesVector(), b, u, z);
-
-    math::Vector<ElementType, orientation> r{ -2, 0, 1, 1 };
-    testing::ProcessTest(implementationName + "::ScaleAddSet(0.0, ones, 1.0, Vector, output)", z == r);
-}
-
-template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
-void TestVectorScaleAddSetOnesOneZero()
-{
-    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
-
-    ElementType a = 1.0;
-    ElementType b = 0.0;
-    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
-    math::Vector<ElementType, orientation> z(4);
-
-    math::ScaleAddSet<implementation>(a, math::OnesVector(), b, u, z);
-
-    math::Vector<ElementType, orientation> r{ 1, 1, 1, 1 };
-    testing::ProcessTest(implementationName + "::ScaleAddSet(1.0, ones, 0.0, Vector, output)", z == r);
-}
-
-template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
-void TestVectorScaleAddSetOnesOneOne()
-{
-    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
-
-    ElementType a = 1.0;
-    ElementType b = 1.0;
-    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
-    math::Vector<ElementType, orientation> z(4);
-
-    math::ScaleAddSet<implementation>(a, math::OnesVector(), b, u, z);
-
-    math::Vector<ElementType, orientation> r{ -1, 1, 2, 2 };
-    testing::ProcessTest(implementationName + "::ScaleAddSet(1.0, ones, 1.0, Vector, output)", z == r);
-}
-
-template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
-void TestVectorScaleAddSetOnesZeroZero()
-{
-    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
-
-    ElementType a = 0.0;
-    ElementType b = 0.0;
-    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
-    math::Vector<ElementType, orientation> z(4);
-
-    math::ScaleAddSet<implementation>(a, math::OnesVector(), b, u, z);
-
-    math::Vector<ElementType, orientation> r{ 0, 0, 0, 0 };
-    testing::ProcessTest(implementationName + "::ScaleAddSet(0.0, ones, 0.0, Vector, output)", z == r);
-}
-
-template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
-void TestVectorScaleAddSetVector()
-{
-    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
-
-    ElementType a = 3.0;
-    math::Vector<ElementType, orientation> v{ 1, -1, 1, -1 };
-    ElementType b = 2.0;
-    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
-    math::Vector<ElementType, orientation> z(4);
-
-    math::ScaleAddSet<implementation>(a, v, b, u, z);
-
-    math::Vector<ElementType, orientation> r{ -1, -3, 5, -1 };
-    testing::ProcessTest(implementationName + "::ScaleAddSet(scalar, Vector, scalar, Vector, output)", z == r);
-}
-
-template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
-void TestVectorScaleAddSetVectorScalarZero()
-{
-    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
-
-    ElementType a = 3.0;
-    math::Vector<ElementType, orientation> v{ 1, -1, 1, -1 };
-    ElementType b = 0.0;
-    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
-    math::Vector<ElementType, orientation> z(4);
-
-    math::ScaleAddSet<implementation>(a, v, b, u, z);
-
-    math::Vector<ElementType, orientation> r{ 3, -3, 3, -3 };
-    testing::ProcessTest(implementationName + "::ScaleAddSet(scalar, Vector, 0.0, Vector, output)", z == r);
-}
-
-template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
-void TestVectorScaleAddSetVectorScalarOne()
-{
-    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
-
-    ElementType a = 3.0;
-    math::Vector<ElementType, orientation> v{ 1, -1, 1, -1 };
-    ElementType b = 1.0;
-    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
-    math::Vector<ElementType, orientation> z(4);
-
-    math::ScaleAddSet<implementation>(a, v, b, u, z);
-
-    math::Vector<ElementType, orientation> r{ 1, -3, 4, -2 };
-    testing::ProcessTest(implementationName + "::ScaleAddSet(scalar, Vector, 1.0, Vector, output)", z == r);
-}
-
-template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
-void TestVectorScaleAddSetVectorZeroScalar()
-{
-    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
-
-    ElementType a = 0.0;
-    math::Vector<ElementType, orientation> v{ 1, -1, 1, -1 };
-    ElementType b = 2.0;
-    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
-    math::Vector<ElementType, orientation> z(4);
-
-    math::ScaleAddSet<implementation>(a, v, b, u, z);
-
-    math::Vector<ElementType, orientation> r{ -4, 0, 2, 2 };
-    testing::ProcessTest(implementationName + "::ScaleAddSet(0.0, Vector, scalar, Vector, output)", z == r);
-}
-
-template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
-void TestVectorScaleAddSetVectorOneScalar()
-{
-    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
-
-    ElementType a = 1.0;
-    math::Vector<ElementType, orientation> v{ 1, -1, 1, -1 };
-    ElementType b = 2.0;
-    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
-    math::Vector<ElementType, orientation> z(4);
-
-    math::ScaleAddSet<implementation>(a, v, b, u, z);
-
-    math::Vector<ElementType, orientation> r{ -3, -1, 3, 1 };
-    testing::ProcessTest(implementationName + "::ScaleAddSet(1.0, Vector, scalar, Vector, output)", z == r);
-}
-
-template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
-void TestVectorScaleAddSetVectorZeroOne()
-{
-    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
-
-    ElementType a = 0.0;
-    math::Vector<ElementType, orientation> v{ 1, -1, 1, -1 };
-    ElementType b = 1.0;
-    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
-    math::Vector<ElementType, orientation> z(4);
-
-    math::ScaleAddSet<implementation>(a, v, b, u, z);
-
-    math::Vector<ElementType, orientation> r{ -2, 0, 1, 1 };
-    testing::ProcessTest(implementationName + "::ScaleAddSet(0.0, Vector, 1.0, Vector, output)", z == r);
-}
-
-template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
-void TestVectorScaleAddSetVectorOneZero()
-{
-    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
-
-    ElementType a = 1.0;
-    math::Vector<ElementType, orientation> v{ 1, -1, 1, -1 };
-    ElementType b = 0.0;
-    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
-    math::Vector<ElementType, orientation> z(4);
-
-    math::ScaleAddSet<implementation>(a, v, b, u, z);
-
-    math::Vector<ElementType, orientation> r{ 1, -1, 1, -1 };
-    testing::ProcessTest(implementationName + "::ScaleAddSet(1.0, Vector, 0.0, Vector, output)", z == r);
-}
-
-template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
-void TestVectorScaleAddSetVectorOneOne()
-{
-    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
-
-    ElementType a = 1.0;
-    math::Vector<ElementType, orientation> v{ 1, -1, 1, -1 };
-    ElementType b = 1.0;
-    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
-    math::Vector<ElementType, orientation> z(4);
-
-    math::ScaleAddSet<implementation>(a, v, b, u, z);
-
-    math::Vector<ElementType, orientation> r{ -1, -1, 2, 0 };
-    testing::ProcessTest(implementationName + "::ScaleAddSet(1.0, Vector, 1.0, Vector, output)", z == r);
-}
-
-template <typename ElementType, math::VectorOrientation orientation, math::ImplementationType implementation>
-void TestVectorScaleAddSetVectorZeroZero()
-{
-    auto implementationName = math::Internal::VectorOperations<implementation>::GetImplementationName();
-
-    ElementType a = 0.0;
-    math::Vector<ElementType, orientation> v{ 1, -1, 1, -1 };
-    ElementType b = 0.0;
-    math::Vector<ElementType, orientation> u{ -2, 0, 1, 1 };
-    math::Vector<ElementType, orientation> z(4);
-
-    math::ScaleAddSet<implementation>(a, v, b, u, z);
-
-    math::Vector<ElementType, orientation> r{ 0, 0, 0, 0 };
-    testing::ProcessTest(implementationName + "::ScaleAddSet(0.0, Vector, 0.0, Vector, output)", z == r);
-}
-
-template <typename ElementType, math::VectorOrientation orientation>
-void TestVectorCumulativeSumUpdate()
-{
-    math::Vector<ElementType, orientation> v{ 1, -1, 3, 2 };
-    math::CumulativeSumUpdate(v);
-    math::Vector<ElementType, orientation> r{ 1, 0, 3, 5 };
-    testing::ProcessTest("CumulativeSumUpdate(Vector)", v == r);
-}
-
-template <typename ElementType, math::VectorOrientation orientation>
-void TestVectorConsecutiveDifferenceUpdate()
-{
-    math::Vector<ElementType, orientation> v{ 1, -1, 3, 2 };
-    math::ConsecutiveDifferenceUpdate(v);
-    math::Vector<ElementType, orientation> r{ 1, -2, 4, -1 };
-    testing::ProcessTest("ConsecutiveDifferenceUpdate(Vector)", v == r);
-}
-
-template <typename ElementType, math::VectorOrientation orientation>
-void TestVectorArchiver()
-{
-    math::Vector<ElementType, orientation> V{ 1, 2, 3, 4, 5, 5, 4, 3, 2, 1 };
-
-    utilities::SerializationContext context;
-    std::stringstream strstream;
-    utilities::JsonArchiver archiver(strstream);
-
-    math::VectorArchiver::Write(V, "test", archiver);
-    utilities::JsonUnarchiver unarchiver(strstream, context);
-
-    math::Vector<ElementType, orientation> Va(0);
-    math::VectorArchiver::Read(Va, "test", unarchiver);
-
-    testing::ProcessTest("VectorArchiver", Va == V);
-}
diff --git a/libraries/math/test/tcc/math_profile.tcc b/libraries/math/test/tcc/math_profile.tcc
deleted file mode 100644
index a7b469692..000000000
--- a/libraries/math/test/tcc/math_profile.tcc
+++ /dev/null
@@ -1,201 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     math_profile.tcc (math_test)
-//  Authors:  Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#include <math/include/MatrixOperations.h>
-#include <math/include/Vector.h>
-#include <math/include/VectorOperations.h>
-
-#include <utilities/include/RandomEngines.h>
-
-#include <chrono>
-#include <iostream>
-#include <random>
-
-using namespace ell;
-
-template <typename Function>
-double GetTime(Function function, size_t repetitions)
-{
-    // warm up
-    function();
-    function();
-    function();
-
-    // timed reps
-    auto start = std::chrono::high_resolution_clock::now();
-    for (size_t t = 0; t < repetitions; ++t)
-    {
-        function();
-    }
-    auto finish = std::chrono::high_resolution_clock::now();
-    auto duration = std::chrono::duration_cast<std::chrono::microseconds>(finish - start).count();
-    return static_cast<double>(duration);
-}
-
-void PrintLine(std::string functionName, double native, double singleBlas, double multiBlas)
-{
-    std::cout << functionName
-              << "\tnative:1.0\tsingleBlas:" << singleBlas / native
-              << "\tmultiBlas:" << multiBlas / native
-              << std::endl;
-}
-
-template <typename ElementType, typename VectorAType, math::VectorOrientation orientation>
-void ProfileVectorScaleAddWorker(ElementType scalarA, VectorAType vectorA, ElementType scalarB, math::VectorReference<ElementType, orientation> vectorB, std::string description, size_t repetitions)
-{
-    double native = GetTime([&]() { math::ScaleAddUpdate<math::ImplementationType::native>(scalarA, vectorA, scalarB, vectorB); }, repetitions);
-    math::Blas::SetNumThreads(0);
-    double multiBlas = GetTime([&]() { math::ScaleAddUpdate<math::ImplementationType::openBlas>(scalarA, vectorA, scalarB, vectorB); }, repetitions);
-    math::Blas::SetNumThreads(1);
-    double singleBlas = GetTime([&]() { math::ScaleAddUpdate<math::ImplementationType::openBlas>(scalarA, vectorA, scalarB, vectorB); }, repetitions);
-
-    std::string type = std::string("<") + typeid(ElementType).name() + ">";
-    PrintLine("ScaleAddUpdate" + type + "(" + description + ", vector)", native, singleBlas, multiBlas);
-}
-
-template <typename ElementType>
-void ProfileVectorScaleAdd(size_t size, size_t repetitions, std::string seed)
-{
-    auto engine = utilities::GetRandomEngine(seed);
-    std::uniform_real_distribution<ElementType> uniform(-1, 1);
-    auto generator = [&]() { return uniform(engine); };
-
-    math::RowVector<ElementType> v(size);
-    v.Generate(generator);
-
-    math::RowVector<ElementType> u(size);
-    u.Generate(generator);
-
-    ElementType scalar = static_cast<ElementType>(-7.3);
-    ElementType one = 1.0;
-
-    ProfileVectorScaleAddWorker(scalar, math::OnesVector(), one, u, "scalar, ones, one", repetitions);
-    ProfileVectorScaleAddWorker(one, v, one, u, "one, vector, one", repetitions);
-    ProfileVectorScaleAddWorker(scalar, v, one, u, "scalar, vector, one", repetitions);
-    ProfileVectorScaleAddWorker(scalar, math::OnesVector(), scalar, u, "scalar, ones, scalar", repetitions);
-    ProfileVectorScaleAddWorker(one, v, scalar, u, "one, vector, scalar", repetitions);
-    ProfileVectorScaleAddWorker(scalar, v, scalar, u, "scalar, vector, scalar", repetitions);
-}
-
-template <typename ElementType>
-void ProfileVectorInner(size_t size, size_t repetitions, std::string seed)
-{
-    auto engine = utilities::GetRandomEngine(seed);
-    std::uniform_real_distribution<ElementType> uniform(-1, 1);
-    auto generator = [&]() { return uniform(engine); };
-
-    math::RowVector<ElementType> u(size);
-    u.Generate(generator);
-
-    math::ColumnVector<ElementType> v(size);
-    v.Generate(generator);
-
-    ElementType result;
-    double native = GetTime([&]() { math::Internal::VectorOperations<math::ImplementationType::native>::InnerProduct(u, v, result); }, repetitions);
-    math::Blas::SetNumThreads(1);
-    double singleBlas = GetTime([&]() { math::Internal::VectorOperations<math::ImplementationType::openBlas>::InnerProduct(u, v, result); }, repetitions);
-    math::Blas::SetNumThreads(0);
-    double multiBlas = GetTime([&]() { math::Internal::VectorOperations<math::ImplementationType::openBlas>::InnerProduct(u, v, result); }, repetitions);
-
-    std::string type = std::string("<") + typeid(ElementType).name() + ">";
-    std::string vector = "Vector" + type + "[" + std::to_string(size) + "]";
-    PrintLine("Dot(" + vector + ", " + vector + ")", native, singleBlas, multiBlas);
-}
-
-template <typename ElementType, math::MatrixLayout layout>
-void ProfileVectorOuter(size_t size, size_t repetitions, std::string seed)
-{
-    auto engine = utilities::GetRandomEngine(seed);
-    std::uniform_real_distribution<ElementType> uniform(-1, 1);
-    auto generator = [&]() { return uniform(engine); };
-
-    math::ColumnVector<ElementType> u(size);
-    u.Generate(generator);
-
-    math::RowVector<ElementType> v(size);
-    v.Generate(generator);
-
-    math::Matrix<ElementType, layout> S(size, size);
-
-    double native = GetTime([&]() { math::Internal::VectorOperations<math::ImplementationType::native>::OuterProduct(u, v, S); }, repetitions);
-    math::Blas::SetNumThreads(1);
-    double singleBlas = GetTime([&]() { math::Internal::VectorOperations<math::ImplementationType::openBlas>::OuterProduct(u, v, S); }, repetitions);
-    math::Blas::SetNumThreads(0);
-    double multiBlas = GetTime([&]() { math::Internal::VectorOperations<math::ImplementationType::openBlas>::OuterProduct(u, v, S); }, repetitions);
-
-    std::string type = std::string("<") + typeid(ElementType).name() + ">";
-    std::string vector = "Vector" + type + "[" + std::to_string(size) + "]";
-    std::string functionName = "OuterProduct(" + vector + ", " + vector + ")";
-    PrintLine(functionName, native, singleBlas, multiBlas);
-}
-
-template <typename ElementType, math::MatrixLayout layout>
-void ProfileMatrixVectorMultiplyScaleAddUpdate(size_t numRows, size_t numColumns, size_t repetitions, std::string seed)
-{
-    auto engine = utilities::GetRandomEngine(seed);
-    std::uniform_real_distribution<ElementType> uniform(-1, 1);
-    auto generator = [&]() { return uniform(engine); };
-
-    math::Matrix<ElementType, layout> M(numRows, numColumns);
-    M.Generate(generator);
-
-    math::ColumnVector<ElementType> v(numColumns);
-    v.Generate(generator);
-
-    math::ColumnVector<ElementType> u(numRows);
-    u.Generate(generator);
-
-    auto s = generator();
-    auto t = generator();
-
-    double native = GetTime([&]() { math::Internal::MatrixOperations<math::ImplementationType::native>::MultiplyScaleAddUpdate(s, M, v, t, u); }, repetitions);
-    math::Blas::SetNumThreads(1);
-    double singleBlas = GetTime([&]() { math::Internal::MatrixOperations<math::ImplementationType::openBlas>::MultiplyScaleAddUpdate(s, M, v, t, u); }, repetitions);
-    math::Blas::SetNumThreads(0);
-    double multiBlas = GetTime([&]() { math::Internal::MatrixOperations<math::ImplementationType::openBlas>::MultiplyScaleAddUpdate(s, M, v, t, u); }, repetitions);
-
-    std::string type = std::string("<") + typeid(ElementType).name() + ">";
-    std::string vector1 = "Vector" + type + "[" + std::to_string(numColumns) + "]";
-    std::string vector2 = "Vector" + type + "[" + std::to_string(numRows) + "]";
-    std::string matrix = "Matrix" + type + "[" + std::to_string(numRows) + ", " + std::to_string(numColumns) + "]";
-    std::string functionName = "MultiplyScaleAddUpdate(scalar, " + matrix + ", " + vector1 + ", scalar, " + vector2 + ")";
-    PrintLine(functionName, native, singleBlas, multiBlas);
-}
-
-template <typename ElementType, math::MatrixLayout layout1, math::MatrixLayout layout2>
-void ProfileMatrixMatrixMultiplyScaleAddUpdate(size_t numRows, size_t numColumns, size_t numColumns2, size_t repetitions, std::string seed)
-{
-    auto engine = utilities::GetRandomEngine(seed);
-    std::uniform_real_distribution<ElementType> uniform(-1, 1);
-    auto generator = [&]() { return uniform(engine); };
-
-    math::Matrix<ElementType, layout1> M(numRows, numColumns);
-    M.Generate(generator);
-
-    math::Matrix<ElementType, layout2> N(numColumns, numColumns2);
-    N.Generate(generator);
-
-    math::Matrix<ElementType, layout1> T(numRows, numColumns2);
-    T.Generate(generator);
-
-    auto a = generator();
-    auto b = generator();
-
-    double native = GetTime([&]() { math::Internal::MatrixOperations<math::ImplementationType::native>::MultiplyScaleAddUpdate(a, M, N, b, T); }, repetitions);
-    math::Blas::SetNumThreads(1);
-    double singleBlas = GetTime([&]() { math::Internal::MatrixOperations<math::ImplementationType::openBlas>::MultiplyScaleAddUpdate(a, M, N, b, T); }, repetitions);
-    math::Blas::SetNumThreads(0);
-    double multiBlas = GetTime([&]() { math::Internal::MatrixOperations<math::ImplementationType::openBlas>::MultiplyScaleAddUpdate(a, M, N, b, T); }, repetitions);
-
-    std::string type = std::string("<") + typeid(ElementType).name() + ">";
-    std::string matrix1 = "Matrix" + type + "[" + std::to_string(numRows) + ", " + std::to_string(numColumns) + "]";
-    std::string matrix2 = "Matrix" + type + "[" + std::to_string(numColumns) + ", " + std::to_string(numColumns2) + "]";
-    std::string matrix3 = "Matrix" + type + "[" + std::to_string(numRows) + ", " + std::to_string(numColumns2) + "]";
-    std::string functionName = "MultiplyScaleAddUpdate(scalar, " + matrix1 + ", " + matrix2 + ", scalar, " + matrix3 + ")";
-    PrintLine(functionName, native, singleBlas, multiBlas);
-}
diff --git a/libraries/model/CMakeLists.txt b/libraries/model/CMakeLists.txt
index 82f161590..7597a6a7d 100644
--- a/libraries/model/CMakeLists.txt
+++ b/libraries/model/CMakeLists.txt
@@ -56,25 +56,6 @@ set(include
     include/SpliceNode.h
 )
 
-set(tcc
-    tcc/InputNode.tcc
-    tcc/InputPort.tcc
-    tcc/IRCompiledMap.tcc
-    tcc/IRMapCompiler.tcc
-    tcc/Map.tcc
-    tcc/MapCompiler.tcc
-    tcc/Model.tcc
-    tcc/ModelBuilder.tcc
-    tcc/ModelTransformer.tcc
-    tcc/NodeMap.tcc
-    tcc/OutputNode.tcc
-    tcc/OutputPort.tcc
-    tcc/Port.tcc
-    tcc/PortElements.tcc
-    tcc/SliceNode.tcc
-    tcc/SpliceNode.tcc
-)
-
 set(doc
     doc/CompileAsFunction.md
     doc/Metadata.md
@@ -94,22 +75,17 @@ set(optimizer_include
     optimizer/include/OptimizationPassRegistry.h
 )
 
-set(optimizer_tcc
-)
-
 set(optimizer_doc
 )
 
 source_group("src" FILES ${src})
 source_group("include" FILES ${include})
-source_group("tcc" FILES ${tcc})
 source_group("doc" FILES ${doc})
 source_group("optimizer\\include" FILES ${optimizer_include})
 source_group("optimizer\\src" FILES ${optimizer_src})
-source_group("optimizer\\tcc" FILES ${optimizer_tcc})
 source_group("optimizer\\doc" FILES ${optimizer_doc})
 
-add_library(${library_name} ${src} ${include} ${tcc} ${doc} ${optimizer_src} ${optimizer_include} ${optimizer_tcc} ${optimizer_doc})
+add_library(${library_name} ${src} ${include} ${doc} ${optimizer_src} ${optimizer_include} ${optimizer_doc})
 target_include_directories(${library_name} PRIVATE include optimizer/include ${ELL_LIBRARIES_DIR})
 target_link_libraries(${library_name} utilities data emitters)
 
@@ -140,9 +116,6 @@ set(test_include
     test/include/PortElements_test.h
 )
 
-set(test_tcc
-)
-
 source_group("src" FILES ${test_src})
 source_group("include" FILES ${test_include})
 
@@ -178,17 +151,10 @@ set(compiler_test_include
     test/include/PerformanceCountersTest.h
 )
 
-set(compiler_test_tcc
-    test/tcc/CompilableNodesTest.tcc
-    test/tcc/CompilerTest.tcc
-    test/tcc/ModelMaker.tcc
-)
-
 source_group("src" FILES ${compiler_test_src})
 source_group("include" FILES ${compiler_test_include})
-source_group("tcc" FILES ${compiler_test_tcc})
 
-add_executable(${compiler_test_name} ${compiler_test_src} ${compiler_test_include} ${compiler_test_tcc})
+add_executable(${compiler_test_name} ${compiler_test_src} ${compiler_test_include})
 target_include_directories(${compiler_test_name} PRIVATE test/include ${ELL_LIBRARIES_DIR})
 target_link_libraries(${compiler_test_name} common model nodes passes testing model_testing utilities)
 copy_shared_libraries(${compiler_test_name})
diff --git a/libraries/model/include/IRCompiledMap.h b/libraries/model/include/IRCompiledMap.h
index d0389ffff..c65fb13ef 100644
--- a/libraries/model/include/IRCompiledMap.h
+++ b/libraries/model/include/IRCompiledMap.h
@@ -238,4 +238,90 @@ namespace model
 } // namespace model
 } // namespace ell
 
-#include "../tcc/IRCompiledMap.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace model
+{
+    template <typename InputType>
+    void IRCompiledMap::SetComputeFunctionForInputType() const
+    {
+        if (!_computeFunctionDefined)
+        {
+            _computeFunctionDefined = true;
+            auto outputSize = GetOutput(0).Size();
+            auto functionPointer = _executionEngine->ResolveFunctionAddress(_functionName);
+            ComputeFunction<InputType> computeFunction;
+            switch (GetOutput(0).GetPortType()) // Switch on output type
+            {
+            case model::Port::PortType::boolean:
+            {
+                std::get<Vector<bool>>(_cachedOutput).resize(outputSize);
+                auto fn = reinterpret_cast<void (*)(void*, const InputType*, bool*)>(functionPointer);
+                computeFunction = [this, fn](void* context, const InputType* input) {
+                    fn(context, input, (bool*)std::get<Vector<bool>>(_cachedOutput).data());
+                };
+            }
+            break;
+
+            case model::Port::PortType::integer:
+            {
+                std::get<Vector<int>>(_cachedOutput).resize(outputSize);
+                auto fn = reinterpret_cast<void (*)(void*, const InputType*, int*)>(functionPointer);
+                computeFunction = [this, fn](void* context, const InputType* input) {
+                    fn(context, input, std::get<Vector<int>>(_cachedOutput).data());
+                };
+            }
+            break;
+
+            case model::Port::PortType::bigInt:
+            {
+                std::get<Vector<int64_t>>(_cachedOutput).resize(outputSize);
+                auto fn = reinterpret_cast<void (*)(void*, const InputType*, int64_t*)>(functionPointer);
+                computeFunction = [this, fn](void* context, const InputType* input) {
+                    fn(context, input, std::get<Vector<int64_t>>(_cachedOutput).data());
+                };
+            }
+            break;
+
+            case model::Port::PortType::smallReal:
+            {
+                std::get<Vector<float>>(_cachedOutput).resize(outputSize);
+                auto fn = reinterpret_cast<void (*)(void*, const InputType*, float*)>(functionPointer);
+                computeFunction = [this, fn](void* context, const InputType* input) {
+                    fn(context, input, std::get<Vector<float>>(_cachedOutput).data());
+                };
+            }
+            break;
+
+            case model::Port::PortType::real:
+            {
+                std::get<Vector<double>>(_cachedOutput).resize(outputSize);
+                auto fn = reinterpret_cast<void (*)(void*, const InputType*, double*)>(functionPointer);
+                computeFunction = [this, fn](void* context, const InputType* input) {
+                    fn(context, input, std::get<Vector<double>>(_cachedOutput).data());
+                };
+            }
+            break;
+
+            default:
+                throw utilities::InputException(utilities::InputExceptionErrors::typeMismatch);
+            }
+
+            std::get<ComputeFunction<InputType>>(_computeInputFunction) = computeFunction;
+        }
+    }
+
+    template <typename ElementType>
+    ElementType* IRCompiledMap::GetGlobalValuePointer(const std::string& name)
+    {
+        auto& jitter = GetJitter();
+        auto address = jitter.GetGlobalValueAddress(name);
+        return reinterpret_cast<ElementType*>(address);
+    }
+
+} // namespace model
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/model/include/IRMapCompiler.h b/libraries/model/include/IRMapCompiler.h
index af2a97d33..0447e4bf7 100644
--- a/libraries/model/include/IRMapCompiler.h
+++ b/libraries/model/include/IRMapCompiler.h
@@ -17,8 +17,8 @@
 
 #include <model/optimizer/include/ModelOptimizer.h>
 
-#include <emitters/include/LLVMUtilities.h>
 #include <emitters/include/IRModuleEmitter.h>
+#include <emitters/include/LLVMUtilities.h>
 
 #include <utilities/include/Logger.h>
 
@@ -194,4 +194,22 @@ namespace model
 } // namespace model
 } // namespace ell
 
-#include "../tcc/IRMapCompiler.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace model
+{
+    template <typename ValueType>
+    emitters::LLVMValue IRMapCompiler::EnsurePortEmitted(const OutputPortBase& port, ValueType initialValue)
+    {
+        using namespace logging;
+
+        Log() << "EnsurePortEmitted called for port " << port.GetRuntimeTypeName() << EOL;
+        auto pVar = GetOrAllocatePortVariable(port, initialValue);
+        return GetModule().EnsureEmitted(*pVar);
+    }
+} // namespace model
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/model/include/InputNode.h b/libraries/model/include/InputNode.h
index 25f30ca89..c418e0cbf 100644
--- a/libraries/model/include/InputNode.h
+++ b/libraries/model/include/InputNode.h
@@ -91,4 +91,112 @@ namespace model
 } // namespace model
 } // namespace ell
 
-#include "../tcc/InputNode.tcc"
+#pragma region implementation
+
+#include "../include/ModelTransformer.h"
+
+#include <utilities/include/StringUtil.h>
+
+namespace ell
+{
+namespace model
+{
+    template <typename ValueType>
+    InputNode<ValueType>::InputNode() :
+        InputNodeBase(_output),
+        _output(this, defaultOutputPortName, 0)
+    {
+        SetShape(MemoryShape{ 0 });
+    }
+
+    template <typename ValueType>
+    InputNode<ValueType>::InputNode(size_t size) :
+        InputNodeBase(_output),
+        _output(this, defaultOutputPortName, size)
+    {
+        SetShape(MemoryShape{ static_cast<int>(size) });
+    }
+
+    template <typename ValueType>
+    InputNode<ValueType>::InputNode(MemoryShape shape) :
+        InputNodeBase(_output),
+        _output(this, defaultOutputPortName, PortMemoryLayout{ shape })
+    {
+        SetShape(shape);
+    }
+
+    template <typename ValueType>
+    InputNode<ValueType>::InputNode(const PortMemoryLayout& layout) :
+        InputNodeBase(_output),
+        _output(this, defaultOutputPortName, layout)
+    {
+        SetMemoryLayout(layout);
+    }
+
+    template <typename ValueType>
+    void InputNode<ValueType>::SetInput(ValueType inputValue)
+    {
+        SetInput(std::vector<ValueType>{ inputValue });
+    }
+
+    template <typename ValueType>
+    void InputNode<ValueType>::SetInput(std::vector<ValueType> inputValues)
+    {
+        if (_output.Size() != inputValues.size())
+        {
+            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument,
+                                            ell::utilities::FormatString("InputNode output size %zu doesn't match input size %zu", _output.Size(), inputValues.size()));
+        }
+        _inputValues = inputValues;
+    }
+
+    template <typename ValueType>
+    void InputNode<ValueType>::Compute() const
+    {
+        _output.SetOutput(_inputValues);
+    }
+
+    template <typename ValueType>
+    void InputNode<ValueType>::Copy(ModelTransformer& transformer) const
+    {
+        auto newNode = transformer.AddNode<InputNode<ValueType>>(GetShape());
+        transformer.MapNodeOutput(output, newNode->output);
+    }
+
+    template <typename ValueType>
+    void InputNode<ValueType>::Compile(IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
+    {
+        // Input node is typically set up during pass1. By default, no further work needed
+    }
+
+    template <typename ValueType>
+    void InputNode<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
+    {
+        Node::WriteToArchive(archiver);
+        archiver["layout"] << _output.GetMemoryLayout();
+    }
+
+    template <typename ValueType>
+    void InputNode<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
+    {
+        Node::ReadFromArchive(archiver);
+
+        int size;
+        archiver.OptionalProperty("size", 0) >> size;
+        std::vector<int> shapeVector;
+        archiver.OptionalProperty("shape", std::vector<int>{ size }) >> shapeVector;
+        if (archiver.HasNextPropertyName("layout"))
+        {
+            PortMemoryLayout layout;
+            archiver["layout"] >> layout;
+            SetShape(layout.GetActiveSize());
+        }
+        else
+        {
+            SetShape({ shapeVector });
+        }
+    }
+} // namespace model
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/model/include/InputPort.h b/libraries/model/include/InputPort.h
index 6c777fdd1..81de59b85 100644
--- a/libraries/model/include/InputPort.h
+++ b/libraries/model/include/InputPort.h
@@ -162,4 +162,123 @@ namespace model
 } // namespace model
 } // namespace ell
 
-#include "../tcc/InputPort.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace model
+{
+    //
+    // InputPortBase
+    //
+    template <typename ValueType>
+    InputPortBase::InputPortBase(const Node* owningNode, const OutputPort<ValueType>& input, const std::string& name) :
+        Port(owningNode, name, Port::GetPortType<ValueType>()),
+        _referencedPort(&input)
+    {
+    }
+
+    //
+    // InputPort
+    //
+    template <typename ValueType>
+    InputPort<ValueType>::InputPort() :
+        InputPortBase(Port::GetPortType<ValueType>())
+    {
+    }
+
+    template <typename ValueType>
+    InputPort<ValueType>::InputPort(const Node* owningNode, const OutputPort<ValueType>& input, const std::string& name) :
+        InputPortBase(owningNode, input, name)
+    {
+    }
+
+    template <typename ValueType>
+    InputPort<ValueType>& InputPort<ValueType>::operator=(const InputPort<ValueType>& other)
+    {
+        _referencedPort = other._referencedPort;
+        return *this;
+    }
+
+    template <typename ValueType>
+    std::vector<ValueType> InputPort<ValueType>::GetValue() const
+    {
+        if (!IsValid())
+        {
+            return {};
+        }
+
+        auto result = GetReferencedPort().GetOutput();
+
+        if (Size() != result.size())
+        {
+            throw utilities::LogicException(utilities::LogicExceptionErrors::illegalState);
+        }
+        return result;
+    }
+
+    template <typename ValueType>
+    ValueType InputPort<ValueType>::GetValue(size_t index) const
+    {
+        return GetReferencedPort().GetOutput(index);
+    }
+
+    template <typename ValueType>
+    ValueType InputPort<ValueType>::operator[](size_t index) const
+    {
+        return GetValue(index);
+    }
+
+    template <typename ValueType>
+    PortElements<ValueType> InputPort<ValueType>::GetPortElements() const
+    {
+        if (!IsValid())
+        {
+            return {};
+        }
+
+        return PortElements<ValueType>{ GetReferencedPort() };
+    }
+
+    template <typename ValueType>
+    const OutputPort<ValueType>& InputPort<ValueType>::GetReferencedPort() const
+    {
+        if (!IsValid())
+        {
+            throw utilities::LogicException(utilities::LogicExceptionErrors::illegalState, "Error: empty input port.");
+        }
+
+        return static_cast<const OutputPort<ValueType>&>(*_referencedPort);
+    }
+
+    template <typename ValueType>
+    void InputPort<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
+    {
+        Port::WriteToArchive(archiver);
+        auto portElements = PortElements<ValueType>{ GetReferencedPort() };
+        archiver["input"] << portElements;
+    }
+
+    template <typename ValueType>
+    void InputPort<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
+    {
+        Port::ReadFromArchive(archiver);
+        PortElements<ValueType> input;
+        archiver["input"] >> input;
+        if (!input.IsFullPortOutput())
+        {
+            // Back-compat: if this port has a non-simple PortElements, add nodes to the model as needed to simplify.
+            auto& context = archiver.GetContext();
+            ModelSerializationContext& modelContext = dynamic_cast<ModelSerializationContext&>(context);
+            const auto& newInput = modelContext.GetModel()->AddRoutingNodes(input);
+            _referencedPort = &newInput;
+        }
+        else
+        {
+            _referencedPort = input.GetRanges()[0].ReferencedPort();
+        }
+    }
+} // namespace model
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/model/include/Map.h b/libraries/model/include/Map.h
index 009f4a1b6..4855f4f77 100644
--- a/libraries/model/include/Map.h
+++ b/libraries/model/include/Map.h
@@ -455,4 +455,209 @@ namespace model
 } // namespace model
 } // namespace ell
 
-#include "../tcc/Map.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace model
+{
+    namespace MapImpl
+    {
+        template <typename T>
+        T FromDouble(double x)
+        {
+            return static_cast<T>(x);
+        }
+
+        template <>
+        inline bool FromDouble<bool>(double x)
+        {
+            return x != 0;
+        }
+    } // namespace MapImpl
+
+    template <typename OutputType, typename InputType, utilities::IsFundamental<OutputType>, utilities::IsFundamental<InputType>>
+    std::vector<OutputType> Map::Compute(const std::vector<InputType>& inputValues) const
+    {
+        SetInputValue(0, inputValues);
+        return ComputeOutput<OutputType>(GetOutput(0));
+    }
+
+    template <typename OutputVectorType, typename InputVectorType, data::IsDataVector<OutputVectorType>, data::IsDataVector<InputVectorType>>
+    OutputVectorType Map::Compute(const InputVectorType& inputValues) const
+    {
+        SetInputValue(GetInput(0), inputValues);
+        return ComputeOutput<OutputVectorType>(GetOutput(0));
+    }
+
+    //
+    // SetInput
+    //
+    template <typename DataVectorType, typename ElementsType, data::IsDataVector<DataVectorType>>
+    void Map::SetInputValue(InputNodeBase* node, const DataVectorType& inputValues) const
+    {
+        auto inputSize = node->GetOutputPort().Size();
+        auto inputArray = inputValues.ToArray(inputSize);
+        std::vector<ElementsType> array(inputSize);
+        std::transform(inputArray.begin(), inputArray.end(), array.begin(), [](auto x) { return MapImpl::FromDouble<ElementsType>(x); });
+        auto typedNode = static_cast<InputNode<ElementsType>*>(node);
+
+        SetNodeInput(typedNode, array);
+    }
+
+    template <typename DataVectorType, data::IsDataVector<DataVectorType>>
+    void Map::SetInputValue(InputNodeBase* inputNode, const DataVectorType& inputValues) const
+    {
+        switch (inputNode->GetOutputPort().GetType())
+        {
+        case Port::PortType::none:
+            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument);
+            break;
+        case Port::PortType::smallReal:
+            SetInputValue<DataVectorType, float>(inputNode, inputValues);
+            break;
+        case Port::PortType::real:
+            SetInputValue<DataVectorType, double>(inputNode, inputValues);
+            break;
+        case Port::PortType::integer:
+            SetInputValue<DataVectorType, int>(inputNode, inputValues);
+            break;
+        case Port::PortType::bigInt:
+            SetInputValue<DataVectorType, int64_t>(inputNode, inputValues);
+            break;
+        case Port::PortType::categorical:
+            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument);
+            break;
+        case Port::PortType::boolean:
+            SetInputValue<DataVectorType, bool>(inputNode, inputValues);
+            break;
+        default:
+            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument);
+        }
+    }
+
+    // By name
+    template <typename ValueType>
+    void Map::SetInputValue(const std::string& inputName, const std::vector<ValueType>& inputValues) const
+    {
+        auto node = dynamic_cast<InputNode<ValueType>*>(GetInput(inputName));
+        if (node == nullptr)
+        {
+            throw utilities::InputException(utilities::InputExceptionErrors::typeMismatch);
+        }
+
+        SetNodeInput(node, inputValues);
+    }
+
+    template <typename DataVectorType, data::IsDataVector<DataVectorType>>
+    void Map::SetInputValue(const std::string& inputName, const DataVectorType& inputValues) const
+    {
+        auto node = GetInput(inputName);
+        SetInputValue(node, inputValues);
+    }
+
+    // By index
+    template <typename ValueType>
+    void Map::SetInputValue(int index, const std::vector<ValueType>& inputValues) const
+    {
+        auto node = dynamic_cast<InputNode<ValueType>*>(GetInput(index));
+        if (node == nullptr)
+        {
+            std::string nodeType = "missing InputNode<";
+            throw utilities::InputException(utilities::InputExceptionErrors::typeMismatch, nodeType + utilities::TypeName<ValueType>::GetName() + ">");
+        }
+
+        SetNodeInput(node, inputValues);
+    }
+
+    template <typename DataVectorType, data::IsDataVector<DataVectorType>>
+    void Map::SetInputValue(int index, const DataVectorType& inputValues) const
+    {
+        auto node = GetInput(index);
+        SetInputValue(node, inputValues);
+    }
+
+    //
+    // ComputeOutput
+    //
+
+    template <typename OutputDataVectorType, typename ElementsValueType, data::IsDataVector<OutputDataVectorType>>
+    OutputDataVectorType Map::ComputeOutput(const PortElementsBase& elements) const
+    {
+        auto resultVector = ComputeOutput<ElementsValueType>(elements);
+        auto resultVectorIterator = data::MakeVectorIndexValueIterator<data::IterationPolicy::skipZeros>(resultVector);
+        return { resultVectorIterator };
+    }
+
+    template <typename DataVectorType, data::IsDataVector<DataVectorType>>
+    DataVectorType Map::ComputeOutput(const PortElementsBase& elements) const
+    {
+        switch (elements.GetPortType())
+        {
+        case Port::PortType::none:
+            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument);
+            break;
+        case Port::PortType::smallReal:
+            return ComputeOutput<DataVectorType, float>(elements);
+            break;
+        case Port::PortType::real:
+            return ComputeOutput<DataVectorType, double>(elements);
+            break;
+        case Port::PortType::integer:
+            return ComputeOutput<DataVectorType, int>(elements);
+            break;
+        case Port::PortType::bigInt:
+            return ComputeOutput<DataVectorType, int64_t>(elements);
+            break;
+        case Port::PortType::categorical:
+            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument);
+            break;
+        case Port::PortType::boolean:
+            return ComputeOutput<DataVectorType, bool>(elements);
+            break;
+        default:
+            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument);
+        }
+    }
+
+    // By index
+    template <typename ValueType, utilities::IsFundamental<ValueType>>
+    std::vector<ValueType> Map::ComputeOutput(int index) const
+    {
+        return ComputeOutput<ValueType>(GetOutput(index));
+    }
+
+    template <typename DataVectorType, data::IsDataVector<DataVectorType>>
+    DataVectorType Map::ComputeOutput(int index) const
+    {
+        return ComputeOutput<DataVectorType>(GetOutput(index));
+    }
+
+    // By name
+    template <typename ValueType, utilities::IsFundamental<ValueType>>
+    std::vector<ValueType> Map::ComputeOutput(const std::string& outputName) const
+    {
+        return ComputeOutput<ValueType>(GetOutput(outputName));
+    }
+
+    template <typename DataVectorType, data::IsDataVector<DataVectorType>>
+    DataVectorType Map::ComputeOutput(const std::string& outputName) const
+    {
+        return ComputeOutput<DataVectorType>(GetOutput(outputName));
+    }
+
+    template <typename ValueType>
+    PortElements<ValueType> Map::GetOutputElements(size_t outputIndex) const
+    {
+        return PortElements<ValueType>(GetOutput(outputIndex));
+    }
+
+    template <typename ValueType>
+    PortElements<ValueType> Map::GetOutputElements(std::string outputName) const
+    {
+        return PortElements<ValueType>(GetOutput(outputName));
+    }
+} // namespace model
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/model/include/MapCompiler.h b/libraries/model/include/MapCompiler.h
index 39b0038b2..37553531f 100644
--- a/libraries/model/include/MapCompiler.h
+++ b/libraries/model/include/MapCompiler.h
@@ -131,4 +131,46 @@ namespace model
 } // namespace model
 } // namespace ell
 
-#include "../tcc/MapCompiler.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace model
+{
+    template <typename ValueType>
+    emitters::Variable* MapCompiler::AllocatePortVariable(const OutputPortBase& port, ValueType initialValue)
+    {
+        auto pModuleEmitter = GetModuleEmitter();
+        assert(port.Size() != 0);
+
+        emitters::VariableType varType = PortTypeToVariableType(port.GetType());
+        emitters::Variable* pVar = nullptr;
+        if (initialValue == 0)
+        {
+            pVar = pModuleEmitter->Variables().AddVectorVariable(emitters::VariableScope::global, varType, port.Size());
+        }
+        else
+        {
+            pVar = pModuleEmitter->Variables().AddVectorVariable(emitters::VariableScope::global, port.Size(), initialValue);
+        }
+
+        pModuleEmitter->AllocateVariable(*pVar);
+        SetVariableForPort(port, pVar);
+        return pVar;
+    }
+
+    template <typename ValueType>
+    emitters::Variable* MapCompiler::GetOrAllocatePortVariable(const OutputPortBase& port, ValueType initialValue)
+    {
+        emitters::Variable* pVar = GetVariableForPort(port);
+        if (pVar == nullptr)
+        {
+            pVar = AllocatePortVariable(port);
+        }
+        assert(pVar != nullptr);
+        return pVar;
+    }
+} // namespace model
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/model/include/Model.h b/libraries/model/include/Model.h
index 72b4b59a8..6a733ba8d 100644
--- a/libraries/model/include/Model.h
+++ b/libraries/model/include/Model.h
@@ -402,4 +402,212 @@ namespace model
 } // namespace model
 } // namespace ell
 
-#include "../tcc/Model.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace model
+{
+    namespace detail
+    {
+        class ModelNodeRouter
+        {
+        public:
+            template <typename T>
+            static T&& ConvertPortElementsArgImpl(Model& model, T&& arg, std::false_type, bool)
+            {
+                // pass through
+                return std::forward<T>(arg);
+            }
+
+            template <typename T>
+            static auto& ConvertPortElementsArgImpl(Model& model, T&& arg, std::true_type, std::false_type)
+            {
+                // should not use initializer list
+                return model.AddRoutingNodes(std::forward<T>(arg));
+            }
+
+            template <typename T>
+            static auto ConvertPortElementsArgImpl(Model& model, T&& arg, std::true_type, std::true_type)
+            {
+                // should use initializer list
+                return model.AddRoutingNodes({ std::forward<T>(arg) });
+            }
+
+            template <typename T>
+            static decltype(auto) ConvertPortElementsArg(Model& model, T&& arg)
+            {
+                constexpr auto noPassThrough =
+                    std::is_base_of<PortRange, std::decay_t<T>>{} ||
+                    std::is_base_of<PortElementBase, std::decay_t<T>>{} ||
+                    std::is_base_of<PortElementsBase, std::decay_t<T>>{};
+
+                constexpr auto shouldUseInitList = !std::is_base_of<PortElementsBase, std::decay_t<T>>{};
+
+                return ConvertPortElementsArgImpl(
+                    model,
+                    std::forward<T>(arg),
+                    std::integral_constant<bool, noPassThrough>{},
+                    std::integral_constant<bool, shouldUseInitList>{});
+            }
+        };
+    } // namespace detail
+
+    //
+    // Factory method for creating nodes
+    //
+    template <typename NodeType, typename... Args>
+    NodeType* Model::AddNode(Args&&... args)
+    {
+        auto node = std::make_unique<NodeType>(detail::ModelNodeRouter::ConvertPortElementsArg(*this, std::forward<Args>(args))...);
+        auto result = node.get();
+        AddExistingNode(std::move(node));
+        return result;
+    }
+
+    template <typename ValueType>
+    const OutputPort<ValueType>& Model::AddRoutingNodes(const PortElements<ValueType>& elements)
+    {
+        const OutputPortBase& port = AddRoutingNodes(static_cast<const PortElementsBase&>(elements));
+        return static_cast<const OutputPort<ValueType>&>(port);
+    }
+
+    //
+    // Compute output value
+    //
+    template <typename ValueType>
+    std::vector<ValueType> Model::ComputeOutput(const OutputPort<ValueType>& outputPort) const
+    {
+        auto compute = [](const Node& node) { node.Compute(); };
+        VisitSubmodel({ &outputPort }, compute);
+        return outputPort.GetOutput();
+    }
+
+    template <typename ValueType>
+    std::vector<ValueType> Model::ComputeOutput(const PortElements<ValueType>& elements) const
+    {
+        // get set of ports to make sure we visit
+        std::unordered_set<const OutputPortBase*> usedPorts;
+        for (const auto& range : elements.GetRanges())
+        {
+            usedPorts.insert(range.ReferencedPort());
+        }
+
+        auto ports = std::vector<const OutputPortBase*>(usedPorts.begin(), usedPorts.end());
+        VisitSubmodel(ports, [](const Node& node) {
+            node.Compute();
+        });
+
+        // Now construct the output
+        auto numElements = elements.Size();
+        std::vector<ValueType> result(numElements);
+        for (size_t index = 0; index < numElements; ++index)
+        {
+            auto element = elements.GetElement(index);
+            auto port = element.ReferencedPort();
+            auto portOutput = port->GetOutput()[element.GetIndex()];
+            result[index] = portOutput;
+        }
+        return result;
+    }
+
+    template <typename ValueType>
+    std::vector<ValueType> Model::ComputeOutput(const PortElementsBase& elements) const
+    {
+        auto typedElements = PortElements<ValueType>(elements);
+        return ComputeOutput(typedElements);
+    }
+
+    //
+    // Get nodes by type
+    //
+    template <typename NodeType>
+    std::vector<const NodeType*> Model::GetNodesByType() const
+    {
+        std::vector<const NodeType*> result;
+        auto findNodes = [&result](const Node& node) {
+            auto nodePtr = dynamic_cast<const NodeType*>(&node);
+            if (nodePtr != nullptr)
+            {
+                result.push_back(nodePtr);
+            }
+        };
+        Visit(findNodes);
+        return result;
+    }
+
+    template <typename NodeType>
+    std::vector<NodeType*> Model::GetNodesByType()
+    {
+        std::vector<NodeType*> result;
+        auto findNodes = [&result](const Node& node) {
+            auto nodePtr = dynamic_cast<const NodeType*>(&node);
+            if (nodePtr != nullptr)
+            {
+                result.push_back(const_cast<NodeType*>(nodePtr));
+            }
+        };
+        Visit(findNodes);
+        return result;
+    }
+
+    //
+    // Visitors
+    //
+
+    // Visits the entire model
+    template <typename Visitor>
+    void Model::Visit(Visitor&& visitor) const
+    {
+        std::vector<const OutputPortBase*> emptyVec;
+        VisitSubmodel(emptyVec, visitor);
+    }
+
+    // Visits just the parts necessary to compute output node
+    template <typename Visitor>
+    void Model::VisitSubmodel(const OutputPortBase* output, Visitor&& visitor) const
+    {
+        auto iter = GetNodeIterator(output);
+        VisitIteratedNodes(iter, visitor);
+    }
+
+    template <typename Visitor>
+    void Model::VisitSubmodel(const std::vector<const OutputPortBase*>& outputs, Visitor&& visitor) const
+    {
+        auto iter = GetNodeIterator(outputs);
+        VisitIteratedNodes(iter, visitor);
+    }
+
+    template <typename Visitor>
+    void Model::VisitSubmodel(const std::vector<const InputPortBase*>& inputs, const std::vector<const OutputPortBase*>& outputs, Visitor&& visitor) const
+    {
+        auto iter = GetNodeIterator(inputs, outputs);
+        VisitIteratedNodes(iter, visitor);
+    }
+
+    // Base implementation for "Visit" methods
+    template <typename Visitor>
+    void Model::VisitIteratedNodes(NodeIterator& iter, Visitor&& visitor) const
+    {
+        while (iter.IsValid())
+        {
+            visitor(*iter.Get());
+            iter.Next();
+        }
+    }
+
+    // Visits the entire model in reverse
+    template <typename Visitor>
+    void Model::ReverseVisit(Visitor&& visitor) const
+    {
+        auto iter = GetReverseNodeIterator();
+        while (iter.IsValid())
+        {
+            visitor(*iter.Get());
+            iter.Next();
+        }
+    }
+} // namespace model
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/model/include/ModelBuilder.h b/libraries/model/include/ModelBuilder.h
index c106a5d0d..95f3ba3a5 100644
--- a/libraries/model/include/ModelBuilder.h
+++ b/libraries/model/include/ModelBuilder.h
@@ -116,4 +116,86 @@ namespace model
 } // namespace model
 } // namespace ell
 
-#include "../tcc/ModelBuilder.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace model
+{
+    namespace ModelBuilderDetail
+    {
+        // GetArgsSuffixFromVariantVector
+        template <typename ArgsTupleType, size_t... Sequence>
+        ArgsTupleType GetArgsSuffixFromVariantVectorHelper(const std::vector<ell::utilities::Variant>& args, std::index_sequence<Sequence...>)
+        {
+            return ArgsTupleType({ args[Sequence].GetValue<typename std::tuple_element<Sequence + 1, ArgsTupleType>::type>() }...);
+        }
+
+        template <typename FunctionType>
+        auto GetArgsSuffixFromVariantVector(FunctionType& function, const std::vector<ell::utilities::Variant>& args) -> utilities::TupleTailType<utilities::FunctionArgTypes<FunctionType>>
+        {
+            using ArgTypes = utilities::FunctionArgTypes<FunctionType>;
+            using ArgSuffixTypes = utilities::TupleTailType<ArgTypes>;
+            return utilities::GetTupleFromVariants<ArgSuffixTypes>(args);
+        }
+
+        template <typename FunctionType>
+        std::vector<ell::utilities::Variant> GetAddFunctionArgTypes(FunctionType& f)
+        {
+            return utilities::GetVariantsFromTupleType<utilities::TupleTailType<utilities::FunctionArgTypes<FunctionType>>>();
+        }
+
+        // CallAddNodeFunction
+        template <typename FunctionType, size_t... Sequence>
+        Node* CallAddNodeFunctionHelper(FunctionType& function, Model& model, const std::vector<ell::utilities::Variant>& args, std::index_sequence<Sequence...>)
+        {
+            auto argsTuple = GetArgsSuffixFromVariantVector(function, args);
+            return function(model, std::get<Sequence>(argsTuple)...);
+        }
+
+        template <typename FunctionType>
+        Node* CallAddNodeFunction(FunctionType& function, Model& model, const std::vector<ell::utilities::Variant>& args)
+        {
+            using ArgTypes = utilities::FunctionArgTypes<FunctionType>;
+            return CallAddNodeFunctionHelper(function, model, args, std::make_index_sequence<std::tuple_size<ArgTypes>::value - 1>());
+        }
+    } // namespace ModelBuilderDetail
+
+    // AddNode<NodeType>(Args...)
+    template <typename NodeType, typename... Args>
+    NodeType* ModelBuilder::AddNode(Model& model, Args&&... args)
+    {
+        return model.AddNode<NodeType>(args...);
+    }
+
+    //
+    // RegisterNodeCreator
+    //
+
+    // Using Model::AddNode
+    template <typename NodeType, typename... ArgTypes>
+    void ModelBuilder::RegisterNodeCreator()
+    {
+        auto addFunction = std::function<Node*(Model&, ArgTypes...)>{ [](Model& model, ArgTypes... args) {
+            return model.AddNode<NodeType>(args...);
+        } };
+
+        RegisterNodeCreator<NodeType>(NodeType::GetTypeName(), addFunction);
+    }
+
+    // Using custom add function
+    template <typename NodeType, typename FunctionType>
+    void ModelBuilder::RegisterNodeCreator(const std::string& creatorName, FunctionType addFunction)
+    {
+        auto addNodeFunction = [addFunction](Model& model, const std::vector<ell::utilities::Variant>& args) {
+            return ModelBuilderDetail::CallAddNodeFunction(addFunction, model, args);
+        };
+
+        std::string key = NodeType::GetTypeName();
+        _addNodeFunctions[key] = addNodeFunction;
+        _getNodeArgsFunctions[key] = [addFunction]() { return ModelBuilderDetail::GetAddFunctionArgTypes(addFunction); };
+    }
+} // namespace model
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/model/include/ModelTransformer.h b/libraries/model/include/ModelTransformer.h
index 5a81ac4f4..3eda7ffe3 100644
--- a/libraries/model/include/ModelTransformer.h
+++ b/libraries/model/include/ModelTransformer.h
@@ -445,4 +445,98 @@ namespace model
 } // namespace model
 } // namespace ell
 
-#include "../tcc/ModelTransformer.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace model
+{
+    //
+    // ModelTransformer
+    //
+    template <typename ValueType>
+    const OutputPort<ValueType>& ModelTransformer::TransformSubmodelOnto(const Model& sourceModel, const std::vector<const InputPortBase*>& sourceInputs, const OutputPort<ValueType>& sourceOutput, Model& destModel, const std::vector<const OutputPortBase*>& destInputs, const TransformContext& context, const NodeTransformFunction& transformFunction)
+    {
+        const auto& result = TransformSubmodelOnto(sourceModel, sourceInputs, static_cast<const OutputPortBase&>(sourceOutput), destModel, destInputs, context, transformFunction);
+        return static_cast<const OutputPort<ValueType>&>(result);
+    }
+
+    template <typename ValueType>
+    const OutputPort<ValueType>& ModelTransformer::CopySubmodelOnto(const Model& sourceModel, const std::vector<const InputPortBase*>& sourceInputs, const OutputPort<ValueType>& sourceOutput, Model& destModel, const std::vector<const OutputPortBase*>& destInputs, const TransformContext& context)
+    {
+        const auto& result = CopySubmodelOnto(sourceModel, sourceInputs, static_cast<const OutputPortBase&>(sourceOutput), destModel, destInputs, context);
+        return static_cast<const OutputPort<ValueType>&>(result);
+    }
+
+    template <typename ValueType>
+    const OutputPort<ValueType>& ModelTransformer::GetCorrespondingInputs(const InputPort<ValueType>& port) const
+    {
+        const auto& result = GetCorrespondingInputs(static_cast<const InputPortBase&>(port));
+        return static_cast<const OutputPort<ValueType>&>(result);
+    }
+
+    template <typename ValueType>
+    const OutputPort<ValueType>& ModelTransformer::GetCorrespondingOutputs(const OutputPort<ValueType>& port) const
+    {
+        const auto& result = GetCorrespondingOutputs(static_cast<const OutputPortBase&>(port));
+        return static_cast<const OutputPort<ValueType>&>(result);
+    }
+
+    template <typename ValueType>
+    const OutputPort<ValueType>& ModelTransformer::GetCorrespondingOutputs(const PortElements<ValueType>& elements) const
+    {
+        if (!elements.IsFullPortOutput())
+        {
+            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "ModelTransformer::GetCorrespondingOutputs(): Invalid PortElements");
+        }
+        const auto& result = GetCorrespondingOutputs(*elements.GetRanges()[0].ReferencedPort());
+        return static_cast<const OutputPort<ValueType>&>(result);
+    }
+
+    template <typename NodeType>
+    NodeType* ModelTransformer::GetCorrespondingInputNodeAs(const NodeType* inputNode) const
+    {
+        const auto& newNodeOutputs = GetCorrespondingOutputs(inputNode->GetOutputPort());
+        auto newNodeConst = newNodeOutputs.GetNode();
+        auto newInputNodeConst = dynamic_cast<const NodeType*>(newNodeConst);
+        assert(newInputNodeConst != nullptr);
+        auto newInputNode = const_cast<NodeType*>(newInputNodeConst);
+        return newInputNode;
+    }
+
+    template <typename ValueType>
+    InputNode<ValueType>* ModelTransformer::GetCorrespondingInputNode(const InputNode<ValueType>* inputNode) const
+    {
+        return GetCorrespondingInputNodeAs(inputNode);
+    }
+
+    template <typename NodeType, typename... Args>
+    NodeType* ModelTransformer::AddNode(Args&&... args)
+    {
+        auto newNode = _model.AddNode<NodeType>(std::forward<Args>(args)...);
+        _isModelCompilable &= _context.IsNodeCompilable(*newNode);
+        return newNode;
+    }
+
+    template <typename ValueType>
+    void ModelTransformer::MapNodeOutput(const OutputPort<ValueType>& oldPort, const OutputPortBase& newPort)
+    {
+        _elementsMap.MapNodeOutput(&oldPort, &newPort);
+    }
+
+    template <typename ValueType>
+    void ModelTransformer::MapNodeOutput(const OutputPort<ValueType>& oldPort, const OutputPort<ValueType>& newPort)
+    {
+        _elementsMap.MapNodeOutput(&oldPort, &newPort);
+    }
+
+    template <typename ValueType>
+    void ModelTransformer::MapNodeOutput(const OutputPort<ValueType>& oldPort, const PortElements<ValueType>& newElements)
+    {
+        const auto& newPort = _model.AddRoutingNodes(newElements);
+        _elementsMap.MapNodeOutput(&oldPort, &newPort);
+    }
+} // namespace model
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/model/include/NodeMap.h b/libraries/model/include/NodeMap.h
index e08eebc93..9f310d285 100644
--- a/libraries/model/include/NodeMap.h
+++ b/libraries/model/include/NodeMap.h
@@ -52,4 +52,52 @@ namespace model
 } // namespace model
 } // namespace ell
 
-#include "../tcc/NodeMap.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace model
+{
+    template <typename T, T defaultValue>
+    T NodeMap<T, defaultValue>::Get(const model::Node& node) const
+    {
+        T value = defaultValue;
+        auto search = _map.find(node.GetId());
+        if (search != _map.end())
+        {
+            value = search->second;
+        }
+        return value;
+    }
+
+    template <typename T, T defaultValue>
+    void NodeMap<T, defaultValue>::Set(const model::Node& node, T value)
+    {
+        _map[node.GetId()] = value;
+    }
+
+    template <typename T, T defaultValue>
+    bool NodeMap<T, defaultValue>::Contains(const model::Node& node) const
+    {
+        return (Get(node) != nullptr);
+    }
+
+    template <typename T, T defaultValue>
+    void NodeMap<T, defaultValue>::Remove(const model::Node& node)
+    {
+        auto search = _map.find(node.GetId());
+        if (search != _map.end())
+        {
+            _map.erase(search);
+        }
+    }
+
+    template <typename T, T defaultValue>
+    void NodeMap<T, defaultValue>::Clear()
+    {
+        _map.clear();
+    }
+} // namespace model
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/model/include/OutputNode.h b/libraries/model/include/OutputNode.h
index 52dd195ee..5eaeb3ee5 100644
--- a/libraries/model/include/OutputNode.h
+++ b/libraries/model/include/OutputNode.h
@@ -74,4 +74,83 @@ namespace model
 } // namespace model
 } // namespace ell
 
-#include "../tcc/OutputNode.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace model
+{
+    template <typename ValueType>
+    OutputNode<ValueType>::OutputNode() :
+        OutputNodeBase(_input, _output, {}),
+        _input(this, {}, defaultInputPortName),
+        _output(this, defaultOutputPortName, 0)
+    {
+        SetShape({});
+    }
+
+    template <typename ValueType>
+    OutputNode<ValueType>::OutputNode(const model::OutputPort<ValueType>& input) :
+        OutputNodeBase(_input, _output, MemoryShape{ static_cast<int>(input.Size()) }),
+        _input(this, input, defaultInputPortName),
+        _output(this, defaultOutputPortName, input.Size())
+    {
+        SetShape(MemoryShape{ static_cast<int>(input.Size()) });
+    }
+
+    template <typename ValueType>
+    OutputNode<ValueType>::OutputNode(const model::OutputPort<ValueType>& input, const MemoryShape& shape) :
+        OutputNodeBase(_input, _output, shape),
+        _input(this, input, defaultInputPortName),
+        _output(this, defaultOutputPortName, input.Size())
+    {
+        SetShape(shape);
+    }
+
+    template <typename ValueType>
+    void OutputNode<ValueType>::Compute() const
+    {
+        _output.SetOutput(_input.GetValue());
+    }
+
+    template <typename ValueType>
+    void OutputNode<ValueType>::Copy(ModelTransformer& transformer) const
+    {
+        const auto& newInputs = transformer.GetCorrespondingInputs(_input);
+        auto newNode = transformer.AddNode<OutputNode<ValueType>>(newInputs, GetShape());
+        transformer.MapNodeOutput(output, newNode->output);
+    }
+
+    template <typename ValueType>
+    void OutputNode<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
+    {
+        Node::WriteToArchive(archiver);
+        archiver[defaultInputPortName] << _input;
+        archiver["layout"] << _input.GetMemoryLayout();
+    }
+
+    template <typename ValueType>
+    void OutputNode<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
+    {
+        Node::ReadFromArchive(archiver);
+        archiver[defaultInputPortName] >> _input;
+
+        int size;
+        archiver.OptionalProperty("size", 0) >> size;
+        std::vector<int> shapeVector;
+        archiver.OptionalProperty("shape", std::vector<int>{ size }) >> shapeVector;
+        if (archiver.HasNextPropertyName("layout"))
+        {
+            PortMemoryLayout layout;
+            archiver["layout"] >> layout;
+            SetShape(layout.GetActiveSize());
+        }
+        else
+        {
+            SetShape({ shapeVector });
+        }
+    }
+} // namespace model
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/model/include/OutputPort.h b/libraries/model/include/OutputPort.h
index 413a93404..44da6c8bb 100644
--- a/libraries/model/include/OutputPort.h
+++ b/libraries/model/include/OutputPort.h
@@ -197,4 +197,80 @@ namespace model
 } // namespace model
 } // namespace ell
 
-#include "../tcc/OutputPort.tcc"
\ No newline at end of file
+#pragma region implementation
+
+namespace ell
+{
+namespace model
+{
+    //
+    // OutputPort
+    //
+    template <typename ValueType>
+    OutputPort<ValueType>::OutputPort(const Node* node, std::string name, size_t size) :
+        OutputPortBase(node, name, OutputPortBase::GetPortType<ValueType>(), size)
+    {
+    }
+
+    template <typename ValueType>
+    OutputPort<ValueType>::OutputPort(const Node* node, std::string name, const PortMemoryLayout& layout) :
+        OutputPortBase(node, name, OutputPortBase::GetPortType<ValueType>(), layout)
+    {
+    }
+
+    template <typename ValueType>
+    ValueType OutputPort<ValueType>::GetOutput(size_t index) const
+    {
+        return _cachedOutput[index];
+    }
+
+    template <typename ValueType>
+    std::vector<double> OutputPort<ValueType>::GetDoubleOutput() const
+    {
+        std::vector<double> result(_cachedOutput.size());
+        std::copy(_cachedOutput.begin(), _cachedOutput.end(), result.begin());
+        return result;
+    }
+
+    template <typename ValueType>
+    double OutputPort<ValueType>::GetDoubleOutput(size_t index) const
+    {
+        return static_cast<double>(_cachedOutput[index]);
+    }
+
+    template <typename ValueType>
+    template <typename U>
+    void OutputPort<ValueType>::SetOutput(std::initializer_list<U>&& values) const
+    {
+        this->SetOutput(std::begin(values), std::end(values));
+    }
+
+    template <typename ValueType>
+    template <typename C>
+    void OutputPort<ValueType>::SetOutput(C&& values) const
+    {
+        this->SetOutput(std::begin(values), std::end(values));
+    }
+
+    template <typename ValueType>
+    template <typename It>
+    void OutputPort<ValueType>::SetOutput(It begin, It end) const
+    {
+        _cachedOutput.assign(begin, end);
+    }
+
+    template <typename ValueType>
+    void OutputPort<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
+    {
+        OutputPortBase::WriteToArchive(archiver);
+    }
+
+    template <typename ValueType>
+    void OutputPort<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
+    {
+        OutputPortBase::ReadFromArchive(archiver);
+    }
+} // namespace model
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/model/include/Port.h b/libraries/model/include/Port.h
index 6977f04a1..996b7930f 100644
--- a/libraries/model/include/Port.h
+++ b/libraries/model/include/Port.h
@@ -129,4 +129,52 @@ namespace model
 } // namespace model
 } // namespace ell
 
-#include "../tcc/Port.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace model
+{
+    struct unknown_t
+    {
+    };
+
+    template <>
+    struct PortTypeToValueType<Port::PortType::none>
+    {
+        typedef unknown_t value_type;
+    };
+
+    template <>
+    struct PortTypeToValueType<Port::PortType::smallReal>
+    {
+        typedef float value_type;
+    };
+
+    template <>
+    struct PortTypeToValueType<Port::PortType::real>
+    {
+        typedef double value_type;
+    };
+
+    template <>
+    struct PortTypeToValueType<Port::PortType::integer>
+    {
+        typedef int value_type;
+    };
+
+    template <>
+    struct PortTypeToValueType<Port::PortType::bigInt>
+    {
+        typedef int64_t value_type;
+    };
+
+    template <>
+    struct PortTypeToValueType<Port::PortType::boolean>
+    {
+        typedef bool value_type;
+    };
+} // namespace model
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/model/include/PortElements.h b/libraries/model/include/PortElements.h
index fcd002214..354c6b5dd 100644
--- a/libraries/model/include/PortElements.h
+++ b/libraries/model/include/PortElements.h
@@ -664,4 +664,186 @@ struct hash<ell::model::PortElementsBase>
 };
 } // namespace std
 
-#include "../tcc/PortElements.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace model
+{
+    //
+    // PortElements
+    //
+
+    template <typename ValueType>
+    PortElements<ValueType>::PortElements(const OutputPort<ValueType>& port) :
+        PortElementsBase(port)
+    {
+    }
+
+    template <typename ValueType>
+    PortElements<ValueType>::PortElements(const OutputPort<ValueType>& port, size_t startIndex) :
+        PortElementsBase(PortRange(port, startIndex))
+    {
+    }
+
+    template <typename ValueType>
+    PortElements<ValueType>::PortElements(const OutputPort<ValueType>& port, size_t startIndex, size_t numValues) :
+        PortElementsBase(PortRange(port, startIndex, numValues))
+    {
+    }
+
+    template <typename ValueType>
+    PortElements<ValueType>::PortElements(const PortElement<ValueType>& element)
+    {
+        AddRange(PortRange(*element.ReferencedPort(), element.GetIndex(), 1));
+    }
+
+    template <typename ValueType>
+    PortElements<ValueType>::PortElements(const std::vector<PortElement<ValueType>>& elements)
+    {
+        for (const auto& element : elements)
+        {
+            AddRange({ element.ReferencedPort(), element.GetIndex() });
+        }
+    }
+
+    template <typename ValueType>
+    PortElements<ValueType>::PortElements(const std::initializer_list<PortElements<ValueType>>& groups)
+    {
+        for (const auto& group : groups)
+        {
+            for (const auto& range : group.GetRanges())
+            {
+                AddRange(range);
+            }
+        }
+    }
+
+    template <typename ValueType>
+    PortElements<ValueType>::PortElements(const std::vector<PortElements<ValueType>>& groups)
+    {
+        for (const auto& group : groups)
+        {
+            for (const auto& range : group.GetRanges())
+            {
+                AddRange(range);
+            }
+        }
+    }
+
+    template <typename ValueType>
+    PortElements<ValueType>::PortElements(const PortElements<ValueType>& elements, size_t index) :
+        PortElements(elements, index, 1)
+    {
+    }
+
+    template <typename ValueType>
+    PortElements<ValueType>::PortElements(const PortElements<ValueType>& elements, size_t startIndex, size_t numValues)
+    {
+        if (startIndex + numValues > elements.Size())
+        {
+            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "Invalid slice.");
+        }
+
+        auto rangeIterator = elements.GetRanges().begin();
+        auto endIterator = elements.GetRanges().end();
+        // skip ranges that come before the desired elements
+        while (rangeIterator != endIterator && rangeIterator->Size() <= startIndex)
+        {
+            startIndex -= rangeIterator->Size();
+            ++rangeIterator;
+        }
+
+        // now extract portions from ranges until done
+        while (rangeIterator != endIterator && numValues > 0)
+        {
+            size_t numRangeValues = std::min(rangeIterator->Size() - startIndex, numValues);
+            AddRange({ *rangeIterator->ReferencedPort(), startIndex, numRangeValues });
+            numValues -= numRangeValues;
+            ++rangeIterator;
+            startIndex = 0; // after the first time through, we'll always take the first part of a range
+        }
+        ComputeSize();
+    }
+
+    template <typename ValueType>
+    PortElements<ValueType>::PortElements(const PortElementsBase& other)
+    {
+        for (const auto& range : other.GetRanges())
+        {
+            if (range.GetPortType() != Port::GetPortType<ValueType>())
+            {
+                throw utilities::InputException(utilities::InputExceptionErrors::typeMismatch);
+            }
+            AddRange(range);
+        }
+    }
+
+    template <typename ValueType>
+    PortElement<ValueType> PortElements<ValueType>::GetElement(size_t index) const
+    {
+        auto baseElement = PortElementsBase::GetElement(index);
+        auto element = static_cast<PortElement<ValueType>&>(baseElement);
+        return element;
+    }
+
+    template <typename ValueType>
+    void PortElements<ValueType>::Append(const PortElements<ValueType>& other)
+    {
+        PortElementsBase::Append(other);
+    }
+
+    //
+    // Convenience functions
+    //
+
+    // MakePortElements
+    template <typename ValueType>
+    PortElements<ValueType> MakePortElements(const OutputPort<ValueType>& port)
+    {
+        return PortElements<ValueType>(port);
+    }
+
+    template <typename ValueType>
+    PortElements<ValueType> MakePortElements(const OutputPort<ValueType>& port, size_t startIndex)
+    {
+        return PortElements<ValueType>(port, startIndex);
+    }
+
+    template <typename ValueType>
+    PortElements<ValueType> MakePortElements(const OutputPort<ValueType>& port, size_t startIndex, size_t numValues)
+    {
+        return PortElements<ValueType>(port, startIndex, numValues);
+    }
+
+    // Concat
+    template <typename RefType, typename... Refs>
+    RefType Concat(const RefType& ref1, Refs&&... refs)
+    {
+        return RefType({ ref1, refs... });
+    }
+
+    //
+    // Proxy classes
+    //
+    template <typename ValueType>
+    PortElementsProxy PortElementsToProxy(const PortElements<ValueType>& elements)
+    {
+        PortElementsProxy proxy(elements.GetPortType());
+        for (auto r : elements.GetRanges())
+        {
+            proxy.Append(r);
+        }
+        return proxy;
+    }
+
+    template <typename ValueType>
+    PortElements<ValueType> ProxyToPortElements(const Model& model, const PortElementsProxy& proxy)
+    {
+        return PortElements<ValueType>(ProxyToPortElements(model, proxy));
+    }
+
+} // namespace model
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/model/include/SliceNode.h b/libraries/model/include/SliceNode.h
index b48d7020c..577a67236 100644
--- a/libraries/model/include/SliceNode.h
+++ b/libraries/model/include/SliceNode.h
@@ -76,4 +76,95 @@ namespace model
 } // namespace model
 } // namespace ell
 
-#include "../tcc/SliceNode.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace model
+{
+    template <typename ValueType>
+    SliceNode<ValueType>::SliceNode() :
+        CompilableNode({ &_input }, { &_output }),
+        _input(this, {}, defaultInputPortName),
+        _output(this, defaultOutputPortName, 0){};
+
+    template <typename ValueType>
+    SliceNode<ValueType>::SliceNode(const OutputPortBase& port, int start, int count) :
+        CompilableNode({ &_input }, { &_output }),
+        _input(this, static_cast<const OutputPort<ValueType>&>(port), defaultInputPortName),
+        _output(this, defaultOutputPortName, port.GetMemoryLayout()),
+        _largestDimensionStart(start),
+        _largestDimensionCount(count)
+    {
+        auto layout = port.GetMemoryLayout();
+        if (layout.HasPadding())
+        {
+            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "SliceNode must not have padding on its input");
+        }
+
+        auto newShape = layout.GetActiveSize();
+        newShape[0] = _largestDimensionCount;
+        _output.SetMemoryLayout({ newShape, layout.GetLogicalDimensionOrder() });
+    }
+
+    template <typename ValueType>
+    void SliceNode<ValueType>::Compute() const
+    {
+        auto input = _input.GetValue();
+        auto output = std::vector<ValueType>(input.begin() + _largestDimensionStart, input.begin() + _largestDimensionStart + _largestDimensionCount);
+        _output.SetOutput(output);
+    }
+
+    template <typename ValueType>
+    void SliceNode<ValueType>::Compile(IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
+    {
+        if (GetPortVariableType(_input) != GetPortVariableType(_output))
+        {
+            throw utilities::LogicException(utilities::LogicExceptionErrors::illegalState, "Input and output port types must match");
+        }
+
+        auto input = function.LocalArray(compiler.EnsurePortEmitted(_input));
+        auto output = function.LocalArray(compiler.EnsurePortEmitted(_output));
+
+        auto layout = _input.GetReferencedPort().GetMemoryLayout();
+        const auto increment = layout.GetCumulativeIncrement(0); // slowest-moving dimension
+        const auto inputOffset = static_cast<int>(_largestDimensionStart * increment);
+        const auto rangeSize = _largestDimensionCount * increment;
+        function.For(rangeSize, [=](emitters::IRFunctionEmitter& function, emitters::IRLocalScalar i) {
+            output[i] = input[inputOffset + i];
+        });
+    }
+
+    template <typename ValueType>
+    void SliceNode<ValueType>::Copy(ModelTransformer& transformer) const
+    {
+        const auto& newInputs = transformer.GetCorrespondingInputs(_input);
+        auto newNode = transformer.AddNode<SliceNode<ValueType>>(newInputs, _largestDimensionStart, _largestDimensionCount);
+        transformer.MapNodeOutput(output, newNode->output);
+    }
+
+    template <typename ValueType>
+    void SliceNode<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
+    {
+        Node::WriteToArchive(archiver);
+        archiver[defaultInputPortName] << _input;
+        archiver["start"] << _largestDimensionStart;
+        archiver["count"] << _largestDimensionCount;
+        archiver["layout"] << _output.GetMemoryLayout();
+    }
+
+    template <typename ValueType>
+    void SliceNode<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
+    {
+        Node::ReadFromArchive(archiver);
+        archiver[defaultInputPortName] >> _input;
+        archiver["start"] >> _largestDimensionStart;
+        archiver["count"] >> _largestDimensionCount;
+        PortMemoryLayout layout;
+        archiver["layout"] >> layout;
+        _output.SetMemoryLayout(layout);
+    }
+} // namespace model
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/model/include/SpliceNode.h b/libraries/model/include/SpliceNode.h
index b7288f80c..bcf3002c9 100644
--- a/libraries/model/include/SpliceNode.h
+++ b/libraries/model/include/SpliceNode.h
@@ -75,4 +75,153 @@ namespace model
 } // namespace model
 } // namespace ell
 
-#include "../tcc/SpliceNode.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace model
+{
+    template <typename ValueType>
+    SpliceNode<ValueType>::SpliceNode() :
+        CompilableNode({}, { &_output }),
+        _output(this, defaultOutputPortName, 0)
+    {}
+
+    template <typename ValueType>
+    SpliceNode<ValueType>::SpliceNode(const std::vector<const OutputPortBase*>& inputs) :
+        CompilableNode({}, { &_output }),
+        _output(this, defaultOutputPortName, ComputeOutputLayout(inputs))
+    {
+        auto layout = _output.GetMemoryLayout();
+        if (layout.HasPadding())
+        {
+            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "SpliceNode must not have padding on its input");
+        }
+
+        // Add 1 input port per port in the input list
+        auto increment = layout.GetCumulativeIncrement(0);
+        int index = 0;
+        for (const auto& inputPort : inputs)
+        {
+            if (inputPort->Size() % increment != 0)
+            {
+                throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "SpliceNode input port size must be multiple of largest dimension increment");
+            }
+
+            // Create a new InputPort object
+            auto portName = std::string("input_") + std::to_string(index);
+            _inputPorts.emplace_back(std::make_unique<InputPort<ValueType>>(this, static_cast<const OutputPort<ValueType>&>(*inputPort), portName));
+
+            // And add it to this node
+            auto rawPtr = _inputPorts.back().get();
+            AddInputPort(rawPtr);
+            ++index;
+        }
+    }
+
+    template <typename ValueType>
+    PortMemoryLayout SpliceNode<ValueType>::ComputeOutputLayout(const std::vector<const OutputPortBase*>& inputPorts)
+    {
+        std::vector<PortRange> ranges;
+        for (auto port : inputPorts)
+        {
+            ranges.emplace_back(*port);
+        }
+        PortElementsBase elements(ranges);
+        return elements.GetMemoryLayout();
+    }
+
+    template <typename ValueType>
+    void SpliceNode<ValueType>::Compute() const
+    {
+        std::vector<ValueType> output;
+        output.reserve(_output.Size());
+        for (const auto& input : _inputPorts)
+        {
+            auto value = input->GetValue();
+            std::copy(value.begin(), value.end(), std::back_inserter(output));
+        }
+        _output.SetOutput(output);
+    }
+
+    template <typename ValueType>
+    void SpliceNode<ValueType>::Compile(IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
+    {
+        llvm::Value* pOutput = compiler.EnsurePortEmitted(_output);
+        // check if the pOutput variable is null
+        function.If(ell::emitters::TypedComparison::notEquals, pOutput, function.NullPointer(pOutput->getType()->getPointerElementType()->getPointerTo()), [pOutput, &compiler, this](emitters::IRFunctionEmitter& function) {
+            if (_inputPorts.size() == 1 && _inputPorts[0]->Size() == 1)
+            {
+                llvm::Value* pVal = compiler.LoadPortElementVariable(_inputPorts[0]->GetInputElement(0));
+                function.Store(pOutput, pVal);
+            }
+            else
+            {
+                int rangeStart = 0;
+                for (const auto& inputPort : _inputPorts)
+                {
+                    const auto& referencedPort = inputPort->GetReferencedPort();
+                    auto input = function.LocalArray(compiler.EnsurePortEmitted(referencedPort));
+                    auto output = function.LocalArray(pOutput);
+                    auto rangeSize = referencedPort.Size();
+
+                    function.For(rangeSize, [=](emitters::IRFunctionEmitter& function, auto i) {
+                        output[i + rangeStart] = input[i];
+                    });
+                    rangeStart += rangeSize;
+                }
+            }
+        });
+    }
+
+    template <typename ValueType>
+    void SpliceNode<ValueType>::Copy(ModelTransformer& transformer) const
+    {
+        std::vector<const OutputPortBase*> newInputs;
+        for (const auto& inputPort : _inputPorts)
+        {
+            const auto& newPort = transformer.GetCorrespondingInputs(*inputPort);
+            newInputs.emplace_back(&newPort);
+        }
+        auto newNode = transformer.AddNode<SpliceNode<ValueType>>(newInputs);
+        transformer.MapNodeOutput(output, newNode->output);
+    }
+
+    template <typename ValueType>
+    void SpliceNode<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
+    {
+        Node::WriteToArchive(archiver);
+        int numInputs = static_cast<int>(_inputPorts.size());
+        archiver["numInputs"] << numInputs;
+        for (int index = 0; index < numInputs; ++index)
+        {
+            archiver[std::string("input_") + std::to_string(index)] << *_inputPorts[index];
+        }
+    }
+
+    template <typename ValueType>
+    void SpliceNode<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
+    {
+        Node::ReadFromArchive(archiver);
+        int numInputs = 0;
+        archiver["numInputs"] >> numInputs;
+        _inputPorts.clear();
+        std::vector<const OutputPortBase*> referencedPorts;
+        for (int index = 0; index < numInputs; ++index)
+        {
+            InputPort<ValueType> port;
+            auto portName = std::string("input_") + std::to_string(index);
+            archiver[portName] >> port;
+            const auto& referencedPort = port.GetReferencedPort();
+            _inputPorts.emplace_back(std::make_unique<InputPort<ValueType>>(this, referencedPort, portName));
+            auto rawPtr = _inputPorts.back().get();
+            AddInputPort(rawPtr);
+            referencedPorts.push_back(&(_inputPorts.back()->GetReferencedPort()));
+        }
+
+        _output.SetMemoryLayout(ComputeOutputLayout(referencedPorts));
+    }
+} // namespace model
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/model/optimizer/include/ModelOptimizer.h b/libraries/model/optimizer/include/ModelOptimizer.h
index 17a4edb6d..47e14e1c4 100644
--- a/libraries/model/optimizer/include/ModelOptimizer.h
+++ b/libraries/model/optimizer/include/ModelOptimizer.h
@@ -8,9 +8,9 @@
 
 #pragma once
 
+#include "ModelOptimizerOptions.h"
 #include <model/include/MapCompilerOptions.h>
 #include <model/include/Model.h>
-#include "ModelOptimizerOptions.h"
 #include <model/include/ModelTransformer.h>
 
 #include <utilities/include/Exception.h>
diff --git a/libraries/model/optimizer/src/OptimizationPass.cpp b/libraries/model/optimizer/src/OptimizationPass.cpp
index 826a15959..3508b4bd5 100644
--- a/libraries/model/optimizer/src/OptimizationPass.cpp
+++ b/libraries/model/optimizer/src/OptimizationPass.cpp
@@ -8,9 +8,9 @@
 
 #include "OptimizationPass.h"
 
+#include "ModelOptimizer.h"
 #include <model/include/MapCompilerOptions.h>
 #include <model/include/Model.h>
-#include "ModelOptimizer.h"
 #include <model/include/ModelTransformer.h>
 
 #include <utilities/include/Unused.h>
diff --git a/libraries/model/tcc/IRCompiledMap.tcc b/libraries/model/tcc/IRCompiledMap.tcc
deleted file mode 100644
index e08220243..000000000
--- a/libraries/model/tcc/IRCompiledMap.tcc
+++ /dev/null
@@ -1,91 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     IRCompiledMap.tcc (model)
-//  Authors:  Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace model
-{
-    template <typename InputType>
-    void IRCompiledMap::SetComputeFunctionForInputType() const
-    {
-        if (!_computeFunctionDefined)
-        {
-            _computeFunctionDefined = true;
-            auto outputSize = GetOutput(0).Size();
-            auto functionPointer = _executionEngine->ResolveFunctionAddress(_functionName);
-            ComputeFunction<InputType> computeFunction;
-            switch (GetOutput(0).GetPortType()) // Switch on output type
-            {
-            case model::Port::PortType::boolean:
-            {
-                std::get<Vector<bool>>(_cachedOutput).resize(outputSize);
-                auto fn = reinterpret_cast<void (*)(void*, const InputType*, bool*)>(functionPointer);
-                computeFunction = [this, fn](void* context, const InputType* input) {
-                    fn(context, input, (bool*)std::get<Vector<bool>>(_cachedOutput).data());
-                };
-            }
-            break;
-
-            case model::Port::PortType::integer:
-            {
-                std::get<Vector<int>>(_cachedOutput).resize(outputSize);
-                auto fn = reinterpret_cast<void (*)(void*, const InputType*, int*)>(functionPointer);
-                computeFunction = [this, fn](void* context, const InputType* input) {
-                    fn(context, input, std::get<Vector<int>>(_cachedOutput).data());
-                };
-            }
-            break;
-
-            case model::Port::PortType::bigInt:
-            {
-                std::get<Vector<int64_t>>(_cachedOutput).resize(outputSize);
-                auto fn = reinterpret_cast<void (*)(void*, const InputType*, int64_t*)>(functionPointer);
-                computeFunction = [this, fn](void* context, const InputType* input) {
-                    fn(context, input, std::get<Vector<int64_t>>(_cachedOutput).data());
-                };
-            }
-            break;
-
-            case model::Port::PortType::smallReal:
-            {
-                std::get<Vector<float>>(_cachedOutput).resize(outputSize);
-                auto fn = reinterpret_cast<void (*)(void*, const InputType*, float*)>(functionPointer);
-                computeFunction = [this, fn](void* context, const InputType* input) {
-                    fn(context, input, std::get<Vector<float>>(_cachedOutput).data());
-                };
-            }
-            break;
-
-            case model::Port::PortType::real:
-            {
-                std::get<Vector<double>>(_cachedOutput).resize(outputSize);
-                auto fn = reinterpret_cast<void (*)(void*, const InputType*, double*)>(functionPointer);
-                computeFunction = [this, fn](void* context, const InputType* input) {
-                    fn(context, input, std::get<Vector<double>>(_cachedOutput).data());
-                };
-            }
-            break;
-
-            default:
-                throw utilities::InputException(utilities::InputExceptionErrors::typeMismatch);
-            }
-
-            std::get<ComputeFunction<InputType>>(_computeInputFunction) = computeFunction;
-        }
-    }
-
-    template <typename ElementType>
-    ElementType* IRCompiledMap::GetGlobalValuePointer(const std::string& name)
-    {
-        auto& jitter = GetJitter();
-        auto address = jitter.GetGlobalValueAddress(name);
-        return reinterpret_cast<ElementType*>(address);
-    }
-
-} // namespace model
-} // namespace ell
diff --git a/libraries/model/tcc/IRMapCompiler.tcc b/libraries/model/tcc/IRMapCompiler.tcc
deleted file mode 100644
index c53903b10..000000000
--- a/libraries/model/tcc/IRMapCompiler.tcc
+++ /dev/null
@@ -1,23 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     IRMapCompiler.tcc (model)
-//  Authors:  Umesh Madan, Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace model
-{
-    template <typename ValueType>
-    emitters::LLVMValue IRMapCompiler::EnsurePortEmitted(const OutputPortBase& port, ValueType initialValue)
-    {
-        using namespace logging;
-
-        Log() << "EnsurePortEmitted called for port " << port.GetRuntimeTypeName() << EOL;
-        auto pVar = GetOrAllocatePortVariable(port, initialValue);
-        return GetModule().EnsureEmitted(*pVar);
-    }
-} // namespace model
-} // namespace ell
diff --git a/libraries/model/tcc/InputNode.tcc b/libraries/model/tcc/InputNode.tcc
deleted file mode 100644
index 57cee7562..000000000
--- a/libraries/model/tcc/InputNode.tcc
+++ /dev/null
@@ -1,113 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     InputNode.tcc (model)
-//  Authors:  Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#include "../include/ModelTransformer.h"
-
-#include <utilities/include/StringUtil.h>
-
-namespace ell
-{
-namespace model
-{
-    template <typename ValueType>
-    InputNode<ValueType>::InputNode() :
-        InputNodeBase(_output),
-        _output(this, defaultOutputPortName, 0)
-    {
-        SetShape(MemoryShape{ 0 });
-    }
-
-    template <typename ValueType>
-    InputNode<ValueType>::InputNode(size_t size) :
-        InputNodeBase(_output),
-        _output(this, defaultOutputPortName, size)
-    {
-        SetShape(MemoryShape{ static_cast<int>(size) });
-    }
-
-    template <typename ValueType>
-    InputNode<ValueType>::InputNode(MemoryShape shape) :
-        InputNodeBase(_output),
-        _output(this, defaultOutputPortName, PortMemoryLayout{ shape })
-    {
-        SetShape(shape);
-    }
-
-    template <typename ValueType>
-    InputNode<ValueType>::InputNode(const PortMemoryLayout& layout) :
-        InputNodeBase(_output),
-        _output(this, defaultOutputPortName, layout)
-    {
-        SetMemoryLayout(layout);
-    }
-
-    template <typename ValueType>
-    void InputNode<ValueType>::SetInput(ValueType inputValue)
-    {
-        SetInput(std::vector<ValueType>{ inputValue });
-    }
-
-    template <typename ValueType>
-    void InputNode<ValueType>::SetInput(std::vector<ValueType> inputValues)
-    {
-        if (_output.Size() != inputValues.size())
-        {
-            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument,
-                                            ell::utilities::FormatString("InputNode output size %zu doesn't match input size %zu", _output.Size(), inputValues.size()));
-        }
-        _inputValues = inputValues;
-    }
-
-    template <typename ValueType>
-    void InputNode<ValueType>::Compute() const
-    {
-        _output.SetOutput(_inputValues);
-    }
-
-    template <typename ValueType>
-    void InputNode<ValueType>::Copy(ModelTransformer& transformer) const
-    {
-        auto newNode = transformer.AddNode<InputNode<ValueType>>(GetShape());
-        transformer.MapNodeOutput(output, newNode->output);
-    }
-
-    template <typename ValueType>
-    void InputNode<ValueType>::Compile(IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
-    {
-        // Input node is typically set up during pass1. By default, no further work needed
-    }
-
-    template <typename ValueType>
-    void InputNode<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
-    {
-        Node::WriteToArchive(archiver);
-        archiver["layout"] << _output.GetMemoryLayout();
-    }
-
-    template <typename ValueType>
-    void InputNode<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
-    {
-        Node::ReadFromArchive(archiver);
-
-        int size;
-        archiver.OptionalProperty("size", 0) >> size;
-        std::vector<int> shapeVector;
-        archiver.OptionalProperty("shape", std::vector<int>{ size }) >> shapeVector;
-        if (archiver.HasNextPropertyName("layout"))
-        {
-            PortMemoryLayout layout;
-            archiver["layout"] >> layout;
-            SetShape(layout.GetActiveSize());
-        }
-        else
-        {
-            SetShape({ shapeVector });
-        }
-    }
-} // namespace model
-} // namespace ell
diff --git a/libraries/model/tcc/InputPort.tcc b/libraries/model/tcc/InputPort.tcc
deleted file mode 100644
index f58b23002..000000000
--- a/libraries/model/tcc/InputPort.tcc
+++ /dev/null
@@ -1,124 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     InputPort.tcc (model)
-//  Authors:  Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace model
-{
-    //
-    // InputPortBase
-    //
-    template <typename ValueType>
-    InputPortBase::InputPortBase(const Node* owningNode, const OutputPort<ValueType>& input, const std::string& name) :
-        Port(owningNode, name, Port::GetPortType<ValueType>()),
-        _referencedPort(&input)
-    {
-    }
-
-    //
-    // InputPort
-    //
-    template <typename ValueType>
-    InputPort<ValueType>::InputPort() :
-        InputPortBase(Port::GetPortType<ValueType>())
-    {
-    }
-
-    template <typename ValueType>
-    InputPort<ValueType>::InputPort(const Node* owningNode, const OutputPort<ValueType>& input, const std::string& name) :
-        InputPortBase(owningNode, input, name)
-    {
-    }
-
-    template <typename ValueType>
-    InputPort<ValueType>& InputPort<ValueType>::operator=(const InputPort<ValueType>& other)
-    {
-        _referencedPort = other._referencedPort;
-        return *this;
-    }
-
-    template <typename ValueType>
-    std::vector<ValueType> InputPort<ValueType>::GetValue() const
-    {
-        if (!IsValid())
-        {
-            return {};
-        }
-
-        auto result = GetReferencedPort().GetOutput();
-
-        if (Size() != result.size())
-        {
-            throw utilities::LogicException(utilities::LogicExceptionErrors::illegalState);
-        }
-        return result;
-    }
-
-    template <typename ValueType>
-    ValueType InputPort<ValueType>::GetValue(size_t index) const
-    {
-        return GetReferencedPort().GetOutput(index);
-    }
-
-    template <typename ValueType>
-    ValueType InputPort<ValueType>::operator[](size_t index) const
-    {
-        return GetValue(index);
-    }
-
-    template <typename ValueType>
-    PortElements<ValueType> InputPort<ValueType>::GetPortElements() const
-    {
-        if (!IsValid())
-        {
-            return {};
-        }
-
-        return PortElements<ValueType>{ GetReferencedPort() };
-    }
-
-    template <typename ValueType>
-    const OutputPort<ValueType>& InputPort<ValueType>::GetReferencedPort() const
-    {
-        if (!IsValid())
-        {
-            throw utilities::LogicException(utilities::LogicExceptionErrors::illegalState, "Error: empty input port.");
-        }
-
-        return static_cast<const OutputPort<ValueType>&>(*_referencedPort);
-    }
-
-    template <typename ValueType>
-    void InputPort<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
-    {
-        Port::WriteToArchive(archiver);
-        auto portElements = PortElements<ValueType>{ GetReferencedPort() };
-        archiver["input"] << portElements;
-    }
-
-    template <typename ValueType>
-    void InputPort<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
-    {
-        Port::ReadFromArchive(archiver);
-        PortElements<ValueType> input;
-        archiver["input"] >> input;
-        if (!input.IsFullPortOutput())
-        {
-            // Back-compat: if this port has a non-simple PortElements, add nodes to the model as needed to simplify.
-            auto& context = archiver.GetContext();
-            ModelSerializationContext& modelContext = dynamic_cast<ModelSerializationContext&>(context);
-            const auto& newInput = modelContext.GetModel()->AddRoutingNodes(input);
-            _referencedPort = &newInput;
-        }
-        else
-        {
-            _referencedPort = input.GetRanges()[0].ReferencedPort();
-        }
-    }
-} // namespace model
-} // namespace ell
diff --git a/libraries/model/tcc/Map.tcc b/libraries/model/tcc/Map.tcc
deleted file mode 100644
index 9689d0b73..000000000
--- a/libraries/model/tcc/Map.tcc
+++ /dev/null
@@ -1,210 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     Map.tcc (model)
-//  Authors:  Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace model
-{
-    namespace MapImpl
-    {
-        template <typename T>
-        T FromDouble(double x)
-        {
-            return static_cast<T>(x);
-        }
-
-        template <>
-        inline bool FromDouble<bool>(double x)
-        {
-            return x != 0;
-        }
-    } // namespace MapImpl
-
-    template <typename OutputType, typename InputType, utilities::IsFundamental<OutputType>, utilities::IsFundamental<InputType>>
-    std::vector<OutputType> Map::Compute(const std::vector<InputType>& inputValues) const
-    {
-        SetInputValue(0, inputValues);
-        return ComputeOutput<OutputType>(GetOutput(0));
-    }
-
-    template <typename OutputVectorType, typename InputVectorType, data::IsDataVector<OutputVectorType>, data::IsDataVector<InputVectorType>>
-    OutputVectorType Map::Compute(const InputVectorType& inputValues) const
-    {
-        SetInputValue(GetInput(0), inputValues);
-        return ComputeOutput<OutputVectorType>(GetOutput(0));
-    }
-
-    //
-    // SetInput
-    //
-    template <typename DataVectorType, typename ElementsType, data::IsDataVector<DataVectorType>>
-    void Map::SetInputValue(InputNodeBase* node, const DataVectorType& inputValues) const
-    {
-        auto inputSize = node->GetOutputPort().Size();
-        auto inputArray = inputValues.ToArray(inputSize);
-        std::vector<ElementsType> array(inputSize);
-        std::transform(inputArray.begin(), inputArray.end(), array.begin(), [](auto x) { return MapImpl::FromDouble<ElementsType>(x); });
-        auto typedNode = static_cast<InputNode<ElementsType>*>(node);
-
-        SetNodeInput(typedNode, array);
-    }
-
-    template <typename DataVectorType, data::IsDataVector<DataVectorType>>
-    void Map::SetInputValue(InputNodeBase* inputNode, const DataVectorType& inputValues) const
-    {
-        switch (inputNode->GetOutputPort().GetType())
-        {
-        case Port::PortType::none:
-            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument);
-            break;
-        case Port::PortType::smallReal:
-            SetInputValue<DataVectorType, float>(inputNode, inputValues);
-            break;
-        case Port::PortType::real:
-            SetInputValue<DataVectorType, double>(inputNode, inputValues);
-            break;
-        case Port::PortType::integer:
-            SetInputValue<DataVectorType, int>(inputNode, inputValues);
-            break;
-        case Port::PortType::bigInt:
-            SetInputValue<DataVectorType, int64_t>(inputNode, inputValues);
-            break;
-        case Port::PortType::categorical:
-            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument);
-            break;
-        case Port::PortType::boolean:
-            SetInputValue<DataVectorType, bool>(inputNode, inputValues);
-            break;
-        default:
-            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument);
-        }
-    }
-
-    // By name
-    template <typename ValueType>
-    void Map::SetInputValue(const std::string& inputName, const std::vector<ValueType>& inputValues) const
-    {
-        auto node = dynamic_cast<InputNode<ValueType>*>(GetInput(inputName));
-        if (node == nullptr)
-        {
-            throw utilities::InputException(utilities::InputExceptionErrors::typeMismatch);
-        }
-
-        SetNodeInput(node, inputValues);
-    }
-
-    template <typename DataVectorType, data::IsDataVector<DataVectorType>>
-    void Map::SetInputValue(const std::string& inputName, const DataVectorType& inputValues) const
-    {
-        auto node = GetInput(inputName);
-        SetInputValue(node, inputValues);
-    }
-
-    // By index
-    template <typename ValueType>
-    void Map::SetInputValue(int index, const std::vector<ValueType>& inputValues) const
-    {
-        auto node = dynamic_cast<InputNode<ValueType>*>(GetInput(index));
-        if (node == nullptr)
-        {
-            std::string nodeType = "missing InputNode<";
-            throw utilities::InputException(utilities::InputExceptionErrors::typeMismatch, nodeType + utilities::TypeName<ValueType>::GetName() + ">");
-        }
-
-        SetNodeInput(node, inputValues);
-    }
-
-    template <typename DataVectorType, data::IsDataVector<DataVectorType>>
-    void Map::SetInputValue(int index, const DataVectorType& inputValues) const
-    {
-        auto node = GetInput(index);
-        SetInputValue(node, inputValues);
-    }
-
-    //
-    // ComputeOutput
-    //
-
-    template <typename OutputDataVectorType, typename ElementsValueType, data::IsDataVector<OutputDataVectorType>>
-    OutputDataVectorType Map::ComputeOutput(const PortElementsBase& elements) const
-    {
-        auto resultVector = ComputeOutput<ElementsValueType>(elements);
-        auto resultVectorIterator = data::MakeVectorIndexValueIterator<data::IterationPolicy::skipZeros>(resultVector);
-        return { resultVectorIterator };
-    }
-
-    template <typename DataVectorType, data::IsDataVector<DataVectorType>>
-    DataVectorType Map::ComputeOutput(const PortElementsBase& elements) const
-    {
-        switch (elements.GetPortType())
-        {
-        case Port::PortType::none:
-            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument);
-            break;
-        case Port::PortType::smallReal:
-            return ComputeOutput<DataVectorType, float>(elements);
-            break;
-        case Port::PortType::real:
-            return ComputeOutput<DataVectorType, double>(elements);
-            break;
-        case Port::PortType::integer:
-            return ComputeOutput<DataVectorType, int>(elements);
-            break;
-        case Port::PortType::bigInt:
-            return ComputeOutput<DataVectorType, int64_t>(elements);
-            break;
-        case Port::PortType::categorical:
-            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument);
-            break;
-        case Port::PortType::boolean:
-            return ComputeOutput<DataVectorType, bool>(elements);
-            break;
-        default:
-            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument);
-        }
-    }
-
-    // By index
-    template <typename ValueType, utilities::IsFundamental<ValueType>>
-    std::vector<ValueType> Map::ComputeOutput(int index) const
-    {
-        return ComputeOutput<ValueType>(GetOutput(index));
-    }
-
-    template <typename DataVectorType, data::IsDataVector<DataVectorType>>
-    DataVectorType Map::ComputeOutput(int index) const
-    {
-        return ComputeOutput<DataVectorType>(GetOutput(index));
-    }
-
-    // By name
-    template <typename ValueType, utilities::IsFundamental<ValueType>>
-    std::vector<ValueType> Map::ComputeOutput(const std::string& outputName) const
-    {
-        return ComputeOutput<ValueType>(GetOutput(outputName));
-    }
-
-    template <typename DataVectorType, data::IsDataVector<DataVectorType>>
-    DataVectorType Map::ComputeOutput(const std::string& outputName) const
-    {
-        return ComputeOutput<DataVectorType>(GetOutput(outputName));
-    }
-
-    template <typename ValueType>
-    PortElements<ValueType> Map::GetOutputElements(size_t outputIndex) const
-    {
-        return PortElements<ValueType>(GetOutput(outputIndex));
-    }
-
-    template <typename ValueType>
-    PortElements<ValueType> Map::GetOutputElements(std::string outputName) const
-    {
-        return PortElements<ValueType>(GetOutput(outputName));
-    }
-} // namespace model
-} // namespace ell
diff --git a/libraries/model/tcc/MapCompiler.tcc b/libraries/model/tcc/MapCompiler.tcc
deleted file mode 100644
index 01acd7db8..000000000
--- a/libraries/model/tcc/MapCompiler.tcc
+++ /dev/null
@@ -1,47 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     MapCompiler.tcc (model)
-//  Authors:  Umesh Madan, Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace model
-{
-    template <typename ValueType>
-    emitters::Variable* MapCompiler::AllocatePortVariable(const OutputPortBase& port, ValueType initialValue)
-    {
-        auto pModuleEmitter = GetModuleEmitter();
-        assert(port.Size() != 0);
-
-        emitters::VariableType varType = PortTypeToVariableType(port.GetType());
-        emitters::Variable* pVar = nullptr;
-        if (initialValue == 0)
-        {
-            pVar = pModuleEmitter->Variables().AddVectorVariable(emitters::VariableScope::global, varType, port.Size());
-        }
-        else
-        {
-            pVar = pModuleEmitter->Variables().AddVectorVariable(emitters::VariableScope::global, port.Size(), initialValue);
-        }
-
-        pModuleEmitter->AllocateVariable(*pVar);
-        SetVariableForPort(port, pVar);
-        return pVar;
-    }
-
-    template <typename ValueType>
-    emitters::Variable* MapCompiler::GetOrAllocatePortVariable(const OutputPortBase& port, ValueType initialValue)
-    {
-        emitters::Variable* pVar = GetVariableForPort(port);
-        if (pVar == nullptr)
-        {
-            pVar = AllocatePortVariable(port);
-        }
-        assert(pVar != nullptr);
-        return pVar;
-    }
-} // namespace model
-} // namespace ell
diff --git a/libraries/model/tcc/Model.tcc b/libraries/model/tcc/Model.tcc
deleted file mode 100644
index cab7296f0..000000000
--- a/libraries/model/tcc/Model.tcc
+++ /dev/null
@@ -1,213 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     Model.tcc (model)
-//  Authors:  Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace model
-{
-    namespace detail
-    {
-        class ModelNodeRouter
-        {
-        public:
-            template <typename T>
-            static T&& ConvertPortElementsArgImpl(Model& model, T&& arg, std::false_type, bool)
-            {
-                // pass through
-                return std::forward<T>(arg);
-            }
-
-            template <typename T>
-            static auto& ConvertPortElementsArgImpl(Model& model, T&& arg, std::true_type, std::false_type)
-            {
-                // should not use initializer list
-                return model.AddRoutingNodes(std::forward<T>(arg));
-            }
-
-            template <typename T>
-            static auto ConvertPortElementsArgImpl(Model& model, T&& arg, std::true_type, std::true_type)
-            {
-                // should use initializer list
-                return model.AddRoutingNodes({ std::forward<T>(arg) });
-            }
-
-            template <typename T>
-            static decltype(auto) ConvertPortElementsArg(Model& model, T&& arg)
-            {
-                constexpr auto noPassThrough =
-                    std::is_base_of<PortRange, std::decay_t<T>>{} ||
-                    std::is_base_of<PortElementBase, std::decay_t<T>>{} ||
-                    std::is_base_of<PortElementsBase, std::decay_t<T>>{};
-
-                constexpr auto shouldUseInitList = !std::is_base_of<PortElementsBase, std::decay_t<T>>{};
-
-                return ConvertPortElementsArgImpl(
-                    model,
-                    std::forward<T>(arg),
-                    std::integral_constant<bool, noPassThrough>{},
-                    std::integral_constant<bool, shouldUseInitList>{});
-            }
-        };
-    } // namespace detail
-
-    //
-    // Factory method for creating nodes
-    //
-    template <typename NodeType, typename... Args>
-    NodeType* Model::AddNode(Args&&... args)
-    {
-        auto node = std::make_unique<NodeType>(detail::ModelNodeRouter::ConvertPortElementsArg(*this, std::forward<Args>(args))...);
-        auto result = node.get();
-        AddExistingNode(std::move(node));
-        return result;
-    }
-
-    template <typename ValueType>
-    const OutputPort<ValueType>& Model::AddRoutingNodes(const PortElements<ValueType>& elements)
-    {
-        const OutputPortBase& port = AddRoutingNodes(static_cast<const PortElementsBase&>(elements));
-        return static_cast<const OutputPort<ValueType>&>(port);
-    }
-
-    //
-    // Compute output value
-    //
-    template <typename ValueType>
-    std::vector<ValueType> Model::ComputeOutput(const OutputPort<ValueType>& outputPort) const
-    {
-        auto compute = [](const Node& node) { node.Compute(); };
-        VisitSubmodel({ &outputPort }, compute);
-        return outputPort.GetOutput();
-    }
-
-    template <typename ValueType>
-    std::vector<ValueType> Model::ComputeOutput(const PortElements<ValueType>& elements) const
-    {
-        // get set of ports to make sure we visit
-        std::unordered_set<const OutputPortBase*> usedPorts;
-        for (const auto& range : elements.GetRanges())
-        {
-            usedPorts.insert(range.ReferencedPort());
-        }
-
-        auto ports = std::vector<const OutputPortBase*>(usedPorts.begin(), usedPorts.end());
-        VisitSubmodel(ports, [](const Node& node) {
-            node.Compute();
-        });
-
-        // Now construct the output
-        auto numElements = elements.Size();
-        std::vector<ValueType> result(numElements);
-        for (size_t index = 0; index < numElements; ++index)
-        {
-            auto element = elements.GetElement(index);
-            auto port = element.ReferencedPort();
-            auto portOutput = port->GetOutput()[element.GetIndex()];
-            result[index] = portOutput;
-        }
-        return result;
-    }
-
-    template <typename ValueType>
-    std::vector<ValueType> Model::ComputeOutput(const PortElementsBase& elements) const
-    {
-        auto typedElements = PortElements<ValueType>(elements);
-        return ComputeOutput(typedElements);
-    }
-
-    //
-    // Get nodes by type
-    //
-    template <typename NodeType>
-    std::vector<const NodeType*> Model::GetNodesByType() const
-    {
-        std::vector<const NodeType*> result;
-        auto findNodes = [&result](const Node& node) {
-            auto nodePtr = dynamic_cast<const NodeType*>(&node);
-            if (nodePtr != nullptr)
-            {
-                result.push_back(nodePtr);
-            }
-        };
-        Visit(findNodes);
-        return result;
-    }
-
-    template <typename NodeType>
-    std::vector<NodeType*> Model::GetNodesByType()
-    {
-        std::vector<NodeType*> result;
-        auto findNodes = [&result](const Node& node) {
-            auto nodePtr = dynamic_cast<const NodeType*>(&node);
-            if (nodePtr != nullptr)
-            {
-                result.push_back(const_cast<NodeType*>(nodePtr));
-            }
-        };
-        Visit(findNodes);
-        return result;
-    }
-
-    //
-    // Visitors
-    //
-
-    // Visits the entire model
-    template <typename Visitor>
-    void Model::Visit(Visitor&& visitor) const
-    {
-        std::vector<const OutputPortBase*> emptyVec;
-        VisitSubmodel(emptyVec, visitor);
-    }
-
-    // Visits just the parts necessary to compute output node
-    template <typename Visitor>
-    void Model::VisitSubmodel(const OutputPortBase* output, Visitor&& visitor) const
-    {
-        auto iter = GetNodeIterator(output);
-        VisitIteratedNodes(iter, visitor);
-    }
-
-    template <typename Visitor>
-    void Model::VisitSubmodel(const std::vector<const OutputPortBase*>& outputs, Visitor&& visitor) const
-    {
-        auto iter = GetNodeIterator(outputs);
-        VisitIteratedNodes(iter, visitor);
-    }
-
-    template <typename Visitor>
-    void Model::VisitSubmodel(const std::vector<const InputPortBase*>& inputs, const std::vector<const OutputPortBase*>& outputs, Visitor&& visitor) const
-    {
-        auto iter = GetNodeIterator(inputs, outputs);
-        VisitIteratedNodes(iter, visitor);
-    }
-
-    // Base implementation for "Visit" methods
-    template <typename Visitor>
-    void Model::VisitIteratedNodes(NodeIterator& iter, Visitor&& visitor) const
-    {
-        while (iter.IsValid())
-        {
-            visitor(*iter.Get());
-            iter.Next();
-        }
-    }
-
-    // Visits the entire model in reverse
-    template <typename Visitor>
-    void Model::ReverseVisit(Visitor&& visitor) const
-    {
-        auto iter = GetReverseNodeIterator();
-        while (iter.IsValid())
-        {
-            visitor(*iter.Get());
-            iter.Next();
-        }
-    }
-} // namespace model
-} // namespace ell
diff --git a/libraries/model/tcc/ModelBuilder.tcc b/libraries/model/tcc/ModelBuilder.tcc
deleted file mode 100644
index 116d2d793..000000000
--- a/libraries/model/tcc/ModelBuilder.tcc
+++ /dev/null
@@ -1,87 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     ModelBuilder.tcc (model)
-//  Authors:  Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace model
-{
-    namespace ModelBuilderDetail
-    {
-        // GetArgsSuffixFromVariantVector
-        template <typename ArgsTupleType, size_t... Sequence>
-        ArgsTupleType GetArgsSuffixFromVariantVectorHelper(const std::vector<ell::utilities::Variant>& args, std::index_sequence<Sequence...>)
-        {
-            return ArgsTupleType({ args[Sequence].GetValue<typename std::tuple_element<Sequence + 1, ArgsTupleType>::type>() }...);
-        }
-
-        template <typename FunctionType>
-        auto GetArgsSuffixFromVariantVector(FunctionType& function, const std::vector<ell::utilities::Variant>& args) -> utilities::TupleTailType<utilities::FunctionArgTypes<FunctionType>>
-        {
-            using ArgTypes = utilities::FunctionArgTypes<FunctionType>;
-            using ArgSuffixTypes = utilities::TupleTailType<ArgTypes>;
-            return utilities::GetTupleFromVariants<ArgSuffixTypes>(args);
-        }
-
-        template <typename FunctionType>
-        std::vector<ell::utilities::Variant> GetAddFunctionArgTypes(FunctionType& f)
-        {
-            return utilities::GetVariantsFromTupleType<utilities::TupleTailType<utilities::FunctionArgTypes<FunctionType>>>();
-        }
-
-        // CallAddNodeFunction
-        template <typename FunctionType, size_t... Sequence>
-        Node* CallAddNodeFunctionHelper(FunctionType& function, Model& model, const std::vector<ell::utilities::Variant>& args, std::index_sequence<Sequence...>)
-        {
-            auto argsTuple = GetArgsSuffixFromVariantVector(function, args);
-            return function(model, std::get<Sequence>(argsTuple)...);
-        }
-
-        template <typename FunctionType>
-        Node* CallAddNodeFunction(FunctionType& function, Model& model, const std::vector<ell::utilities::Variant>& args)
-        {
-            using ArgTypes = utilities::FunctionArgTypes<FunctionType>;
-            return CallAddNodeFunctionHelper(function, model, args, std::make_index_sequence<std::tuple_size<ArgTypes>::value - 1>());
-        }
-    } // namespace ModelBuilderDetail
-
-    // AddNode<NodeType>(Args...)
-    template <typename NodeType, typename... Args>
-    NodeType* ModelBuilder::AddNode(Model& model, Args&&... args)
-    {
-        return model.AddNode<NodeType>(args...);
-    }
-
-    //
-    // RegisterNodeCreator
-    //
-
-    // Using Model::AddNode
-    template <typename NodeType, typename... ArgTypes>
-    void ModelBuilder::RegisterNodeCreator()
-    {
-        auto addFunction = std::function<Node*(Model&, ArgTypes...)>{ [](Model& model, ArgTypes... args) {
-            return model.AddNode<NodeType>(args...);
-        } };
-
-        RegisterNodeCreator<NodeType>(NodeType::GetTypeName(), addFunction);
-    }
-
-    // Using custom add function
-    template <typename NodeType, typename FunctionType>
-    void ModelBuilder::RegisterNodeCreator(const std::string& creatorName, FunctionType addFunction)
-    {
-        auto addNodeFunction = [addFunction](Model& model, const std::vector<ell::utilities::Variant>& args) {
-            return ModelBuilderDetail::CallAddNodeFunction(addFunction, model, args);
-        };
-
-        std::string key = NodeType::GetTypeName();
-        _addNodeFunctions[key] = addNodeFunction;
-        _getNodeArgsFunctions[key] = [addFunction]() { return ModelBuilderDetail::GetAddFunctionArgTypes(addFunction); };
-    }
-} // namespace model
-} // namespace ell
diff --git a/libraries/model/tcc/ModelTransformer.tcc b/libraries/model/tcc/ModelTransformer.tcc
deleted file mode 100644
index 849cb22a4..000000000
--- a/libraries/model/tcc/ModelTransformer.tcc
+++ /dev/null
@@ -1,99 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     ModelTransformer.tcc (model)
-//  Authors:  Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace model
-{
-    //
-    // ModelTransformer
-    //
-    template <typename ValueType>
-    const OutputPort<ValueType>& ModelTransformer::TransformSubmodelOnto(const Model& sourceModel, const std::vector<const InputPortBase*>& sourceInputs, const OutputPort<ValueType>& sourceOutput, Model& destModel, const std::vector<const OutputPortBase*>& destInputs, const TransformContext& context, const NodeTransformFunction& transformFunction)
-    {
-        const auto& result = TransformSubmodelOnto(sourceModel, sourceInputs, static_cast<const OutputPortBase&>(sourceOutput), destModel, destInputs, context, transformFunction);
-        return static_cast<const OutputPort<ValueType>&>(result);
-    }
-
-    template <typename ValueType>
-    const OutputPort<ValueType>& ModelTransformer::CopySubmodelOnto(const Model& sourceModel, const std::vector<const InputPortBase*>& sourceInputs, const OutputPort<ValueType>& sourceOutput, Model& destModel, const std::vector<const OutputPortBase*>& destInputs, const TransformContext& context)
-    {
-        const auto& result = CopySubmodelOnto(sourceModel, sourceInputs, static_cast<const OutputPortBase&>(sourceOutput), destModel, destInputs, context);
-        return static_cast<const OutputPort<ValueType>&>(result);
-    }
-
-    template <typename ValueType>
-    const OutputPort<ValueType>& ModelTransformer::GetCorrespondingInputs(const InputPort<ValueType>& port) const
-    {
-        const auto& result = GetCorrespondingInputs(static_cast<const InputPortBase&>(port));
-        return static_cast<const OutputPort<ValueType>&>(result);
-    }
-
-    template <typename ValueType>
-    const OutputPort<ValueType>& ModelTransformer::GetCorrespondingOutputs(const OutputPort<ValueType>& port) const
-    {
-        const auto& result = GetCorrespondingOutputs(static_cast<const OutputPortBase&>(port));
-        return static_cast<const OutputPort<ValueType>&>(result);
-    }
-
-    template <typename ValueType>
-    const OutputPort<ValueType>& ModelTransformer::GetCorrespondingOutputs(const PortElements<ValueType>& elements) const
-    {
-        if (!elements.IsFullPortOutput())
-        {
-            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "ModelTransformer::GetCorrespondingOutputs(): Invalid PortElements");
-        }
-        const auto& result = GetCorrespondingOutputs(*elements.GetRanges()[0].ReferencedPort());
-        return static_cast<const OutputPort<ValueType>&>(result);
-    }
-
-    template <typename NodeType>
-    NodeType* ModelTransformer::GetCorrespondingInputNodeAs(const NodeType* inputNode) const
-    {
-        const auto& newNodeOutputs = GetCorrespondingOutputs(inputNode->GetOutputPort());
-        auto newNodeConst = newNodeOutputs.GetNode();
-        auto newInputNodeConst = dynamic_cast<const NodeType*>(newNodeConst);
-        assert(newInputNodeConst != nullptr);
-        auto newInputNode = const_cast<NodeType*>(newInputNodeConst);
-        return newInputNode;
-    }
-
-    template <typename ValueType>
-    InputNode<ValueType>* ModelTransformer::GetCorrespondingInputNode(const InputNode<ValueType>* inputNode) const
-    {
-        return GetCorrespondingInputNodeAs(inputNode);
-    }
-
-    template <typename NodeType, typename... Args>
-    NodeType* ModelTransformer::AddNode(Args&&... args)
-    {
-        auto newNode = _model.AddNode<NodeType>(std::forward<Args>(args)...);
-        _isModelCompilable &= _context.IsNodeCompilable(*newNode);
-        return newNode;
-    }
-
-    template <typename ValueType>
-    void ModelTransformer::MapNodeOutput(const OutputPort<ValueType>& oldPort, const OutputPortBase& newPort)
-    {
-        _elementsMap.MapNodeOutput(&oldPort, &newPort);
-    }
-
-    template <typename ValueType>
-    void ModelTransformer::MapNodeOutput(const OutputPort<ValueType>& oldPort, const OutputPort<ValueType>& newPort)
-    {
-        _elementsMap.MapNodeOutput(&oldPort, &newPort);
-    }
-
-    template <typename ValueType>
-    void ModelTransformer::MapNodeOutput(const OutputPort<ValueType>& oldPort, const PortElements<ValueType>& newElements)
-    {
-        const auto& newPort = _model.AddRoutingNodes(newElements);
-        _elementsMap.MapNodeOutput(&oldPort, &newPort);
-    }
-} // namespace model
-} // namespace ell
diff --git a/libraries/model/tcc/NodeMap.tcc b/libraries/model/tcc/NodeMap.tcc
deleted file mode 100644
index 7a7ac4a6e..000000000
--- a/libraries/model/tcc/NodeMap.tcc
+++ /dev/null
@@ -1,53 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     NodeMap.tcc (model)
-//  Authors:  Umesh Madan
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace model
-{
-    template <typename T, T defaultValue>
-    T NodeMap<T, defaultValue>::Get(const model::Node& node) const
-    {
-        T value = defaultValue;
-        auto search = _map.find(node.GetId());
-        if (search != _map.end())
-        {
-            value = search->second;
-        }
-        return value;
-    }
-
-    template <typename T, T defaultValue>
-    void NodeMap<T, defaultValue>::Set(const model::Node& node, T value)
-    {
-        _map[node.GetId()] = value;
-    }
-
-    template <typename T, T defaultValue>
-    bool NodeMap<T, defaultValue>::Contains(const model::Node& node) const
-    {
-        return (Get(node) != nullptr);
-    }
-
-    template <typename T, T defaultValue>
-    void NodeMap<T, defaultValue>::Remove(const model::Node& node)
-    {
-        auto search = _map.find(node.GetId());
-        if (search != _map.end())
-        {
-            _map.erase(search);
-        }
-    }
-
-    template <typename T, T defaultValue>
-    void NodeMap<T, defaultValue>::Clear()
-    {
-        _map.clear();
-    }
-} // namespace model
-} // namespace ell
\ No newline at end of file
diff --git a/libraries/model/tcc/OutputNode.tcc b/libraries/model/tcc/OutputNode.tcc
deleted file mode 100644
index da1d862ae..000000000
--- a/libraries/model/tcc/OutputNode.tcc
+++ /dev/null
@@ -1,84 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     OutputNode.tcc (model)
-//  Authors:  Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace model
-{
-    template <typename ValueType>
-    OutputNode<ValueType>::OutputNode() :
-        OutputNodeBase(_input, _output, {}),
-        _input(this, {}, defaultInputPortName),
-        _output(this, defaultOutputPortName, 0)
-    {
-        SetShape({});
-    }
-
-    template <typename ValueType>
-    OutputNode<ValueType>::OutputNode(const model::OutputPort<ValueType>& input) :
-        OutputNodeBase(_input, _output, MemoryShape{ static_cast<int>(input.Size()) }),
-        _input(this, input, defaultInputPortName),
-        _output(this, defaultOutputPortName, input.Size())
-    {
-        SetShape(MemoryShape{ static_cast<int>(input.Size()) });
-    }
-
-    template <typename ValueType>
-    OutputNode<ValueType>::OutputNode(const model::OutputPort<ValueType>& input, const MemoryShape& shape) :
-        OutputNodeBase(_input, _output, shape),
-        _input(this, input, defaultInputPortName),
-        _output(this, defaultOutputPortName, input.Size())
-    {
-        SetShape(shape);
-    }
-
-    template <typename ValueType>
-    void OutputNode<ValueType>::Compute() const
-    {
-        _output.SetOutput(_input.GetValue());
-    }
-
-    template <typename ValueType>
-    void OutputNode<ValueType>::Copy(ModelTransformer& transformer) const
-    {
-        const auto& newInputs = transformer.GetCorrespondingInputs(_input);
-        auto newNode = transformer.AddNode<OutputNode<ValueType>>(newInputs, GetShape());
-        transformer.MapNodeOutput(output, newNode->output);
-    }
-
-    template <typename ValueType>
-    void OutputNode<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
-    {
-        Node::WriteToArchive(archiver);
-        archiver[defaultInputPortName] << _input;
-        archiver["layout"] << _input.GetMemoryLayout();
-    }
-
-    template <typename ValueType>
-    void OutputNode<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
-    {
-        Node::ReadFromArchive(archiver);
-        archiver[defaultInputPortName] >> _input;
-
-        int size;
-        archiver.OptionalProperty("size", 0) >> size;
-        std::vector<int> shapeVector;
-        archiver.OptionalProperty("shape", std::vector<int>{ size }) >> shapeVector;
-        if (archiver.HasNextPropertyName("layout"))
-        {
-            PortMemoryLayout layout;
-            archiver["layout"] >> layout;
-            SetShape(layout.GetActiveSize());
-        }
-        else
-        {
-            SetShape({ shapeVector });
-        }
-    }
-} // namespace model
-} // namespace ell
diff --git a/libraries/model/tcc/OutputPort.tcc b/libraries/model/tcc/OutputPort.tcc
deleted file mode 100644
index 0fa060c07..000000000
--- a/libraries/model/tcc/OutputPort.tcc
+++ /dev/null
@@ -1,81 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     OutputPort.tcc (model)
-//  Authors:  Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace model
-{
-    //
-    // OutputPort
-    //
-    template <typename ValueType>
-    OutputPort<ValueType>::OutputPort(const Node* node, std::string name, size_t size) :
-        OutputPortBase(node, name, OutputPortBase::GetPortType<ValueType>(), size)
-    {
-    }
-
-    template <typename ValueType>
-    OutputPort<ValueType>::OutputPort(const Node* node, std::string name, const PortMemoryLayout& layout) :
-        OutputPortBase(node, name, OutputPortBase::GetPortType<ValueType>(), layout)
-    {
-    }
-
-    template <typename ValueType>
-    ValueType OutputPort<ValueType>::GetOutput(size_t index) const
-    {
-        return _cachedOutput[index];
-    }
-
-    template <typename ValueType>
-    std::vector<double> OutputPort<ValueType>::GetDoubleOutput() const
-    {
-        std::vector<double> result(_cachedOutput.size());
-        std::copy(_cachedOutput.begin(), _cachedOutput.end(), result.begin());
-        return result;
-    }
-
-    template <typename ValueType>
-    double OutputPort<ValueType>::GetDoubleOutput(size_t index) const
-    {
-        return static_cast<double>(_cachedOutput[index]);
-    }
-
-    template <typename ValueType>
-    template <typename U>
-    void OutputPort<ValueType>::SetOutput(std::initializer_list<U>&& values) const
-    {
-        this->SetOutput(std::begin(values), std::end(values));
-    }
-
-    template <typename ValueType>
-    template <typename C>
-    void OutputPort<ValueType>::SetOutput(C&& values) const
-    {
-        this->SetOutput(std::begin(values), std::end(values));
-    }
-
-    template <typename ValueType>
-    template <typename It>
-    void OutputPort<ValueType>::SetOutput(It begin, It end) const
-    {
-        _cachedOutput.assign(begin, end);
-    }
-
-    template <typename ValueType>
-    void OutputPort<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
-    {
-        OutputPortBase::WriteToArchive(archiver);
-    }
-
-    template <typename ValueType>
-    void OutputPort<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
-    {
-        OutputPortBase::ReadFromArchive(archiver);
-    }
-} // namespace model
-} // namespace ell
diff --git a/libraries/model/tcc/Port.tcc b/libraries/model/tcc/Port.tcc
deleted file mode 100644
index 685fc416c..000000000
--- a/libraries/model/tcc/Port.tcc
+++ /dev/null
@@ -1,53 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     Port.h (model)
-//  Authors:  Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace model
-{
-    struct unknown_t
-    {
-    };
-
-    template <>
-    struct PortTypeToValueType<Port::PortType::none>
-    {
-        typedef unknown_t value_type;
-    };
-
-    template <>
-    struct PortTypeToValueType<Port::PortType::smallReal>
-    {
-        typedef float value_type;
-    };
-
-    template <>
-    struct PortTypeToValueType<Port::PortType::real>
-    {
-        typedef double value_type;
-    };
-
-    template <>
-    struct PortTypeToValueType<Port::PortType::integer>
-    {
-        typedef int value_type;
-    };
-
-    template <>
-    struct PortTypeToValueType<Port::PortType::bigInt>
-    {
-        typedef int64_t value_type;
-    };
-
-    template <>
-    struct PortTypeToValueType<Port::PortType::boolean>
-    {
-        typedef bool value_type;
-    };
-} // namespace model
-} // namespace ell
diff --git a/libraries/model/tcc/PortElements.tcc b/libraries/model/tcc/PortElements.tcc
deleted file mode 100644
index 6423b03b2..000000000
--- a/libraries/model/tcc/PortElements.tcc
+++ /dev/null
@@ -1,187 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     PortElements.tcc (model)
-//  Authors:  Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace model
-{
-    //
-    // PortElements
-    //
-
-    template <typename ValueType>
-    PortElements<ValueType>::PortElements(const OutputPort<ValueType>& port) :
-        PortElementsBase(port)
-    {
-    }
-
-    template <typename ValueType>
-    PortElements<ValueType>::PortElements(const OutputPort<ValueType>& port, size_t startIndex) :
-        PortElementsBase(PortRange(port, startIndex))
-    {
-    }
-
-    template <typename ValueType>
-    PortElements<ValueType>::PortElements(const OutputPort<ValueType>& port, size_t startIndex, size_t numValues) :
-        PortElementsBase(PortRange(port, startIndex, numValues))
-    {
-    }
-
-    template <typename ValueType>
-    PortElements<ValueType>::PortElements(const PortElement<ValueType>& element)
-    {
-        AddRange(PortRange(*element.ReferencedPort(), element.GetIndex(), 1));
-    }
-
-    template <typename ValueType>
-    PortElements<ValueType>::PortElements(const std::vector<PortElement<ValueType>>& elements)
-    {
-        for (const auto& element : elements)
-        {
-            AddRange({ element.ReferencedPort(), element.GetIndex() });
-        }
-    }
-
-    template <typename ValueType>
-    PortElements<ValueType>::PortElements(const std::initializer_list<PortElements<ValueType>>& groups)
-    {
-        for (const auto& group : groups)
-        {
-            for (const auto& range : group.GetRanges())
-            {
-                AddRange(range);
-            }
-        }
-    }
-
-    template <typename ValueType>
-    PortElements<ValueType>::PortElements(const std::vector<PortElements<ValueType>>& groups)
-    {
-        for (const auto& group : groups)
-        {
-            for (const auto& range : group.GetRanges())
-            {
-                AddRange(range);
-            }
-        }
-    }
-
-    template <typename ValueType>
-    PortElements<ValueType>::PortElements(const PortElements<ValueType>& elements, size_t index) :
-        PortElements(elements, index, 1)
-    {
-    }
-
-    template <typename ValueType>
-    PortElements<ValueType>::PortElements(const PortElements<ValueType>& elements, size_t startIndex, size_t numValues)
-    {
-        if (startIndex + numValues > elements.Size())
-        {
-            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "Invalid slice.");
-        }
-
-        auto rangeIterator = elements.GetRanges().begin();
-        auto endIterator = elements.GetRanges().end();
-        // skip ranges that come before the desired elements
-        while (rangeIterator != endIterator && rangeIterator->Size() <= startIndex)
-        {
-            startIndex -= rangeIterator->Size();
-            ++rangeIterator;
-        }
-
-        // now extract portions from ranges until done
-        while (rangeIterator != endIterator && numValues > 0)
-        {
-            size_t numRangeValues = std::min(rangeIterator->Size() - startIndex, numValues);
-            AddRange({ *rangeIterator->ReferencedPort(), startIndex, numRangeValues });
-            numValues -= numRangeValues;
-            ++rangeIterator;
-            startIndex = 0; // after the first time through, we'll always take the first part of a range
-        }
-        ComputeSize();
-    }
-
-    template <typename ValueType>
-    PortElements<ValueType>::PortElements(const PortElementsBase& other)
-    {
-        for (const auto& range : other.GetRanges())
-        {
-            if (range.GetPortType() != Port::GetPortType<ValueType>())
-            {
-                throw utilities::InputException(utilities::InputExceptionErrors::typeMismatch);
-            }
-            AddRange(range);
-        }
-    }
-
-    template <typename ValueType>
-    PortElement<ValueType> PortElements<ValueType>::GetElement(size_t index) const
-    {
-        auto baseElement = PortElementsBase::GetElement(index);
-        auto element = static_cast<PortElement<ValueType>&>(baseElement);
-        return element;
-    }
-
-    template <typename ValueType>
-    void PortElements<ValueType>::Append(const PortElements<ValueType>& other)
-    {
-        PortElementsBase::Append(other);
-    }
-
-    //
-    // Convenience functions
-    //
-
-    // MakePortElements
-    template <typename ValueType>
-    PortElements<ValueType> MakePortElements(const OutputPort<ValueType>& port)
-    {
-        return PortElements<ValueType>(port);
-    }
-
-    template <typename ValueType>
-    PortElements<ValueType> MakePortElements(const OutputPort<ValueType>& port, size_t startIndex)
-    {
-        return PortElements<ValueType>(port, startIndex);
-    }
-
-    template <typename ValueType>
-    PortElements<ValueType> MakePortElements(const OutputPort<ValueType>& port, size_t startIndex, size_t numValues)
-    {
-        return PortElements<ValueType>(port, startIndex, numValues);
-    }
-
-    // Concat
-    template <typename RefType, typename... Refs>
-    RefType Concat(const RefType& ref1, Refs&&... refs)
-    {
-        return RefType({ ref1, refs... });
-    }
-
-    //
-    // Proxy classes
-    //
-    template <typename ValueType>
-    PortElementsProxy PortElementsToProxy(const PortElements<ValueType>& elements)
-    {
-        PortElementsProxy proxy(elements.GetPortType());
-        for (auto r : elements.GetRanges())
-        {
-            proxy.Append(r);
-        }
-        return proxy;
-    }
-
-    template <typename ValueType>
-    PortElements<ValueType> ProxyToPortElements(const Model& model, const PortElementsProxy& proxy)
-    {
-        return PortElements<ValueType>(ProxyToPortElements(model, proxy));
-    }
-
-} // namespace model
-} // namespace ell
diff --git a/libraries/model/tcc/SliceNode.tcc b/libraries/model/tcc/SliceNode.tcc
deleted file mode 100644
index cbb72027c..000000000
--- a/libraries/model/tcc/SliceNode.tcc
+++ /dev/null
@@ -1,96 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     SliceNode.tcc (model)
-//  Authors:  Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace model
-{
-    template <typename ValueType>
-    SliceNode<ValueType>::SliceNode() :
-        CompilableNode({ &_input }, { &_output }),
-        _input(this, {}, defaultInputPortName),
-        _output(this, defaultOutputPortName, 0){};
-
-    template <typename ValueType>
-    SliceNode<ValueType>::SliceNode(const OutputPortBase& port, int start, int count) :
-        CompilableNode({ &_input }, { &_output }),
-        _input(this, static_cast<const OutputPort<ValueType>&>(port), defaultInputPortName),
-        _output(this, defaultOutputPortName, port.GetMemoryLayout()),
-        _largestDimensionStart(start),
-        _largestDimensionCount(count)
-    {
-        auto layout = port.GetMemoryLayout();
-        if (layout.HasPadding())
-        {
-            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "SliceNode must not have padding on its input");
-        }
-
-        auto newShape = layout.GetActiveSize();
-        newShape[0] = _largestDimensionCount;
-        _output.SetMemoryLayout({ newShape, layout.GetLogicalDimensionOrder() });
-    }
-
-    template <typename ValueType>
-    void SliceNode<ValueType>::Compute() const
-    {
-        auto input = _input.GetValue();
-        auto output = std::vector<ValueType>(input.begin() + _largestDimensionStart, input.begin() + _largestDimensionStart + _largestDimensionCount);
-        _output.SetOutput(output);
-    }
-
-    template <typename ValueType>
-    void SliceNode<ValueType>::Compile(IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
-    {
-        if (GetPortVariableType(_input) != GetPortVariableType(_output))
-        {
-            throw utilities::LogicException(utilities::LogicExceptionErrors::illegalState, "Input and output port types must match");
-        }
-
-        auto input = function.LocalArray(compiler.EnsurePortEmitted(_input));
-        auto output = function.LocalArray(compiler.EnsurePortEmitted(_output));
-
-        auto layout = _input.GetReferencedPort().GetMemoryLayout();
-        const auto increment = layout.GetCumulativeIncrement(0); // slowest-moving dimension
-        const auto inputOffset = static_cast<int>(_largestDimensionStart * increment);
-        const auto rangeSize = _largestDimensionCount * increment;
-        function.For(rangeSize, [=](emitters::IRFunctionEmitter& function, emitters::IRLocalScalar i) {
-            output[i] = input[inputOffset + i];
-        });
-    }
-
-    template <typename ValueType>
-    void SliceNode<ValueType>::Copy(ModelTransformer& transformer) const
-    {
-        const auto& newInputs = transformer.GetCorrespondingInputs(_input);
-        auto newNode = transformer.AddNode<SliceNode<ValueType>>(newInputs, _largestDimensionStart, _largestDimensionCount);
-        transformer.MapNodeOutput(output, newNode->output);
-    }
-
-    template <typename ValueType>
-    void SliceNode<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
-    {
-        Node::WriteToArchive(archiver);
-        archiver[defaultInputPortName] << _input;
-        archiver["start"] << _largestDimensionStart;
-        archiver["count"] << _largestDimensionCount;
-        archiver["layout"] << _output.GetMemoryLayout();
-    }
-
-    template <typename ValueType>
-    void SliceNode<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
-    {
-        Node::ReadFromArchive(archiver);
-        archiver[defaultInputPortName] >> _input;
-        archiver["start"] >> _largestDimensionStart;
-        archiver["count"] >> _largestDimensionCount;
-        PortMemoryLayout layout;
-        archiver["layout"] >> layout;
-        _output.SetMemoryLayout(layout);
-    }
-} // namespace model
-} // namespace ell
diff --git a/libraries/model/tcc/SpliceNode.tcc b/libraries/model/tcc/SpliceNode.tcc
deleted file mode 100644
index 90a1becbb..000000000
--- a/libraries/model/tcc/SpliceNode.tcc
+++ /dev/null
@@ -1,154 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     SpliceNode.tcc (model)
-//  Authors:  Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace model
-{
-    template <typename ValueType>
-    SpliceNode<ValueType>::SpliceNode() :
-        CompilableNode({}, { &_output }),
-        _output(this, defaultOutputPortName, 0)
-    {}
-
-    template <typename ValueType>
-    SpliceNode<ValueType>::SpliceNode(const std::vector<const OutputPortBase*>& inputs) :
-        CompilableNode({}, { &_output }),
-        _output(this, defaultOutputPortName, ComputeOutputLayout(inputs))
-    {
-        auto layout = _output.GetMemoryLayout();
-        if (layout.HasPadding())
-        {
-            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "SpliceNode must not have padding on its input");
-        }
-
-        // Add 1 input port per port in the input list
-        auto increment = layout.GetCumulativeIncrement(0);
-        int index = 0;
-        for (const auto& inputPort : inputs)
-        {
-            if (inputPort->Size() % increment != 0)
-            {
-                throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "SpliceNode input port size must be multiple of largest dimension increment");
-            }
-
-            // Create a new InputPort object
-            auto portName = std::string("input_") + std::to_string(index);
-            _inputPorts.emplace_back(std::make_unique<InputPort<ValueType>>(this, static_cast<const OutputPort<ValueType>&>(*inputPort), portName));
-
-            // And add it to this node
-            auto rawPtr = _inputPorts.back().get();
-            AddInputPort(rawPtr);
-            ++index;
-        }
-    }
-
-    template <typename ValueType>
-    PortMemoryLayout SpliceNode<ValueType>::ComputeOutputLayout(const std::vector<const OutputPortBase*>& inputPorts)
-    {
-        std::vector<PortRange> ranges;
-        for (auto port : inputPorts)
-        {
-            ranges.emplace_back(*port);
-        }
-        PortElementsBase elements(ranges);
-        return elements.GetMemoryLayout();
-    }
-
-    template <typename ValueType>
-    void SpliceNode<ValueType>::Compute() const
-    {
-        std::vector<ValueType> output;
-        output.reserve(_output.Size());
-        for (const auto& input : _inputPorts)
-        {
-            auto value = input->GetValue();
-            std::copy(value.begin(), value.end(), std::back_inserter(output));
-        }
-        _output.SetOutput(output);
-    }
-
-    template <typename ValueType>
-    void SpliceNode<ValueType>::Compile(IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
-    {
-        llvm::Value* pOutput = compiler.EnsurePortEmitted(_output);
-        // check if the pOutput variable is null
-        function.If(ell::emitters::TypedComparison::notEquals, pOutput, function.NullPointer(pOutput->getType()->getPointerElementType()->getPointerTo()), [pOutput, &compiler, this](emitters::IRFunctionEmitter& function) {
-            if (_inputPorts.size() == 1 && _inputPorts[0]->Size() == 1)
-            {
-                llvm::Value* pVal = compiler.LoadPortElementVariable(_inputPorts[0]->GetInputElement(0));
-                function.Store(pOutput, pVal);
-            }
-            else
-            {
-                int rangeStart = 0;
-                for (const auto& inputPort : _inputPorts)
-                {
-                    const auto& referencedPort = inputPort->GetReferencedPort();
-                    auto input = function.LocalArray(compiler.EnsurePortEmitted(referencedPort));
-                    auto output = function.LocalArray(pOutput);
-                    auto rangeSize = referencedPort.Size();
-
-                    function.For(rangeSize, [=](emitters::IRFunctionEmitter& function, auto i) {
-                        output[i + rangeStart] = input[i];
-                    });
-                    rangeStart += rangeSize;
-                }
-            }
-        });
-    }
-
-    template <typename ValueType>
-    void SpliceNode<ValueType>::Copy(ModelTransformer& transformer) const
-    {
-        std::vector<const OutputPortBase*> newInputs;
-        for (const auto& inputPort : _inputPorts)
-        {
-            const auto& newPort = transformer.GetCorrespondingInputs(*inputPort);
-            newInputs.emplace_back(&newPort);
-        }
-        auto newNode = transformer.AddNode<SpliceNode<ValueType>>(newInputs);
-        transformer.MapNodeOutput(output, newNode->output);
-    }
-
-    template <typename ValueType>
-    void SpliceNode<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
-    {
-        Node::WriteToArchive(archiver);
-        int numInputs = static_cast<int>(_inputPorts.size());
-        archiver["numInputs"] << numInputs;
-        for (int index = 0; index < numInputs; ++index)
-        {
-            archiver[std::string("input_") + std::to_string(index)] << *_inputPorts[index];
-        }
-    }
-
-    template <typename ValueType>
-    void SpliceNode<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
-    {
-        Node::ReadFromArchive(archiver);
-        int numInputs = 0;
-        archiver["numInputs"] >> numInputs;
-        _inputPorts.clear();
-        std::vector<const OutputPortBase*> referencedPorts;
-        for (int index = 0; index < numInputs; ++index)
-        {
-            InputPort<ValueType> port;
-            auto portName = std::string("input_") + std::to_string(index);
-            archiver[portName] >> port;
-            const auto& referencedPort = port.GetReferencedPort();
-            _inputPorts.emplace_back(std::make_unique<InputPort<ValueType>>(this, referencedPort, portName));
-            auto rawPtr = _inputPorts.back().get();
-            AddInputPort(rawPtr);
-            referencedPorts.push_back(&(_inputPorts.back()->GetReferencedPort()));
-        }
-
-        _output.SetMemoryLayout(ComputeOutputLayout(referencedPorts));
-    }
-} // namespace model
-} // namespace ell
diff --git a/libraries/model/test/include/CompilableNodesTest.h b/libraries/model/test/include/CompilableNodesTest.h
index ec7531257..9b383f6fa 100644
--- a/libraries/model/test/include/CompilableNodesTest.h
+++ b/libraries/model/test/include/CompilableNodesTest.h
@@ -102,4 +102,38 @@ void TestSoftmaxLayerNode(size_t inputPadding = 0, size_t outputPadding = 0);
 void TestFusedLinearLayerNodes(size_t rows, size_t columns, size_t channels);
 void TestRegionDetectionNode();
 
-#include "../tcc/CompilableNodesTest.tcc"
+#pragma region implementation
+
+template <typename ElementType>
+void TestCompilableDotProductNode2(int dimension)
+{
+    model::Model model;
+    std::vector<ElementType> constValue(dimension);
+    for (int index = 0; index < dimension; ++index)
+    {
+        constValue[index] = index + 0.5;
+    }
+    auto inputNode = model.AddNode<model::InputNode<ElementType>>(dimension);
+    auto constantNode = model.AddNode<nodes::ConstantNode<ElementType>>(constValue);
+    auto dotNode = model.AddNode<nodes::DotProductNode<ElementType>>(inputNode->output, constantNode->output);
+    auto map = model::Map(model, { { "input", inputNode } }, { { "output", dotNode->output } });
+    model::IRMapCompiler compiler;
+    auto compiledMap = compiler.Compile(map);
+    PrintIR(compiledMap);
+
+    // compare output
+    std::vector<std::vector<ElementType>> signal;
+    for (int index1 = 0; index1 < 8; ++index1)
+    {
+        std::vector<ElementType> x;
+        for (int index2 = 0; index2 < dimension; ++index2)
+        {
+            x.push_back(index2);
+        }
+        signal.push_back(x);
+    }
+
+    VerifyCompiledOutput(map, compiledMap, signal, "DotProductNode");
+}
+
+#pragma endregion implementation
diff --git a/libraries/model/test/include/CompilerTest.h b/libraries/model/test/include/CompilerTest.h
index fa9f4dd6e..137792f2e 100644
--- a/libraries/model/test/include/CompilerTest.h
+++ b/libraries/model/test/include/CompilerTest.h
@@ -56,4 +56,35 @@ void TestMultiOutputMap();
 void TestMultiSourceSinkMap();
 void TestCompiledMapMove();
 
-#include "../tcc/CompilerTest.tcc"
+#pragma region implementation
+
+template <typename ElementType>
+void TestLinearPredictor()
+{
+    std::vector<std::vector<ElementType>> signal{ { 1.0, 2.0, 1.0, -1.0, 0.5 } };
+
+    const int dim = 5;
+    math::ColumnVector<ElementType> weights({ 1, 2, 3, 4, 5 });
+    ElementType bias = 1.5f;
+
+    predictors::LinearPredictor<ElementType> predictor(weights, bias);
+
+    model::Model model;
+    auto inputNode = model.AddNode<model::InputNode<ElementType>>(dim);
+    auto predictorNode = model.AddNode<nodes::LinearPredictorNode<ElementType>>(inputNode->output, predictor);
+    auto outputNode = model.AddNode<model::OutputNode<ElementType>>(predictorNode->output);
+
+    auto map = model::Map(model, { { "input", inputNode } }, { { "output", outputNode->output } });
+
+    model::MapCompilerOptions settings;
+    settings.mapFunctionName = "TestLinear";
+    model::IRMapCompiler compiler(settings);
+    auto compiledMap = compiler.Compile(map);
+
+    testing::ProcessTest("Testing IsValid of LinearPredictor map", testing::IsEqual(compiledMap.IsValid(), true));
+
+    // compare output
+    VerifyCompiledOutput(map, compiledMap, signal, " map");
+}
+
+#pragma endregion implementation
diff --git a/libraries/model/test/include/ModelMaker.h b/libraries/model/test/include/ModelMaker.h
index f39fcf26b..c36a76bd3 100644
--- a/libraries/model/test/include/ModelMaker.h
+++ b/libraries/model/test/include/ModelMaker.h
@@ -86,4 +86,129 @@ class ModelMaker
     std::string _name;
 };
 
-#include "../tcc/ModelMaker.tcc"
+#pragma region implementation
+
+using namespace ell;
+
+template <typename T>
+model::InputNode<T>* ModelMaker::Inputs(size_t count)
+{
+    return _model.AddNode<model::InputNode<T>>(count);
+}
+
+template <typename T>
+model::InputNode<T>* ModelMaker::Inputs(std::vector<T>& values)
+{
+    auto node = Inputs<T>(values.size());
+    node->SetInput(values);
+    return node;
+}
+
+template <typename T>
+model::OutputNode<T>* ModelMaker::Outputs(const model::OutputPort<T>& x)
+{
+    return _model.AddNode<model::OutputNode<T>>(x);
+}
+
+template <typename T>
+nodes::BinaryOperationNode<T>* ModelMaker::Add(const model::OutputPort<T>& x, const model::OutputPort<T>& y)
+{
+    return _model.AddNode<nodes::BinaryOperationNode<T>>(x, y, emitters::BinaryOperationType::add);
+}
+
+template <typename T>
+nodes::BinaryOperationNode<T>* ModelMaker::Subtract(const model::OutputPort<T>& x, const model::OutputPort<T>& y)
+{
+    return _model.AddNode<nodes::BinaryOperationNode<T>>(x, y, emitters::BinaryOperationType::subtract);
+}
+
+template <typename T>
+nodes::BinaryOperationNode<T>* ModelMaker::Multiply(const model::OutputPort<T>& x, const model::OutputPort<T>& y)
+{
+    return _model.AddNode<nodes::BinaryOperationNode<T>>(x, y, emitters::BinaryOperationType::coordinatewiseMultiply);
+}
+
+template <typename T>
+nodes::BinaryOperationNode<T>* ModelMaker::Divide(const model::OutputPort<T>& x, const model::OutputPort<T>& y)
+{
+    return _model.AddNode<nodes::BinaryOperationNode<T>>(x, y, emitters::BinaryOperationType::coordinatewiseDivide);
+}
+
+template <typename T>
+nodes::DotProductNode<T>* ModelMaker::DotProduct(const model::OutputPort<T>& x, const model::OutputPort<T>& y)
+{
+    return _model.AddNode<nodes::DotProductNode<T>>(x, y);
+}
+
+template <typename T>
+nodes::BinaryPredicateNode<T>* ModelMaker::Equals(const model::OutputPort<T>& x, const model::OutputPort<T>& y)
+{
+    return _model.AddNode<nodes::BinaryPredicateNode<T>>(x, y, emitters::BinaryPredicateType::equal);
+}
+
+template <typename T>
+nodes::BinaryPredicateNode<T>* ModelMaker::Lt(const model::OutputPort<T>& x, const model::OutputPort<T>& y)
+{
+    return _model.AddNode<nodes::BinaryPredicateNode<T>>(x, y, emitters::BinaryPredicateType::less);
+}
+
+template <typename T>
+nodes::BinaryPredicateNode<T>* ModelMaker::Gt(const model::OutputPort<T>& x, const model::OutputPort<T>& y)
+{
+    return _model.AddNode<nodes::BinaryPredicateNode<T>>(x, y, emitters::BinaryPredicateType::greater);
+}
+
+template <typename T, typename S>
+nodes::MultiplexerNode<T, S>* ModelMaker::Select(const model::OutputPort<T>& elts, const model::OutputPort<S>& selector)
+{
+    auto node = _model.AddNode<nodes::MultiplexerNode<T, S>>(elts, selector);
+    return node;
+}
+
+template <typename T>
+nodes::UnaryOperationNode<T>* ModelMaker::Sqrt(const model::OutputPort<T>& x)
+{
+    return _model.AddNode<nodes::UnaryOperationNode<T>>(x, emitters::UnaryOperationType::sqrt);
+}
+
+template <typename T>
+nodes::SumNode<T>* ModelMaker::Sum(const model::OutputPort<T>& x)
+{
+    return _model.AddNode<nodes::SumNode<T>>(x);
+}
+
+template <typename T>
+nodes::DelayNode<T>* ModelMaker::Delay(const model::OutputPort<T>& x, size_t windowSize)
+{
+    return _model.AddNode<nodes::DelayNode<T>>(x, windowSize);
+}
+
+template <typename T>
+nodes::AccumulatorNode<T>* ModelMaker::Accumulate(const model::OutputPort<T>& x)
+{
+    return _model.AddNode<nodes::AccumulatorNode<T>>(x);
+}
+
+template <typename T>
+nodes::ConstantNode<T>* ModelMaker::Constant(const T value)
+{
+    return _model.AddNode<nodes::ConstantNode<T>>(value);
+}
+
+template <typename T>
+nodes::ConstantNode<T>* ModelMaker::Constant(const std::vector<T>& values)
+{
+    auto* pNode = _model.AddNode<nodes::ConstantNode<T>>(values);
+    // Work around a bug. Make sure literal values are propagated to outputs
+    _model.ComputeOutput<T>(pNode->output);
+    return pNode;
+}
+
+template <typename T>
+model::OutputPort<T>* ModelMaker::GetOutputPort(model::Node* pNode, size_t portIndex)
+{
+    auto pPort = pNode->GetOutputPorts()[portIndex];
+    return static_cast<model::OutputPort<T>*>(pPort);
+}
+
+#pragma endregion implementation
diff --git a/libraries/model/test/tcc/CompilableNodesTest.tcc b/libraries/model/test/tcc/CompilableNodesTest.tcc
deleted file mode 100644
index a736c3f2b..000000000
--- a/libraries/model/test/tcc/CompilableNodesTest.tcc
+++ /dev/null
@@ -1,39 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     CompilableNodesTest.cpp (compile_test)
-//  Authors:  Umesh Madan, Chuck Jacobs, Byron Changuion
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template <typename ElementType>
-void TestCompilableDotProductNode2(int dimension)
-{
-    model::Model model;
-    std::vector<ElementType> constValue(dimension);
-    for (int index = 0; index < dimension; ++index)
-    {
-        constValue[index] = index + 0.5;
-    }
-    auto inputNode = model.AddNode<model::InputNode<ElementType>>(dimension);
-    auto constantNode = model.AddNode<nodes::ConstantNode<ElementType>>(constValue);
-    auto dotNode = model.AddNode<nodes::DotProductNode<ElementType>>(inputNode->output, constantNode->output);
-    auto map = model::Map(model, { { "input", inputNode } }, { { "output", dotNode->output } });
-    model::IRMapCompiler compiler;
-    auto compiledMap = compiler.Compile(map);
-    PrintIR(compiledMap);
-
-    // compare output
-    std::vector<std::vector<ElementType>> signal;
-    for (int index1 = 0; index1 < 8; ++index1)
-    {
-        std::vector<ElementType> x;
-        for (int index2 = 0; index2 < dimension; ++index2)
-        {
-            x.push_back(index2);
-        }
-        signal.push_back(x);
-    }
-
-    VerifyCompiledOutput(map, compiledMap, signal, "DotProductNode");
-}
diff --git a/libraries/model/test/tcc/CompilerTest.tcc b/libraries/model/test/tcc/CompilerTest.tcc
deleted file mode 100644
index 4ae4de598..000000000
--- a/libraries/model/test/tcc/CompilerTest.tcc
+++ /dev/null
@@ -1,36 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     CompilerTest.tcc (compile_test)
-//  Authors:  Chuck Jacobs, Byron Changuion
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template <typename ElementType>
-void TestLinearPredictor()
-{
-    std::vector<std::vector<ElementType>> signal{ { 1.0, 2.0, 1.0, -1.0, 0.5 } };
-
-    const int dim = 5;
-    math::ColumnVector<ElementType> weights({ 1, 2, 3, 4, 5 });
-    ElementType bias = 1.5f;
-
-    predictors::LinearPredictor<ElementType> predictor(weights, bias);
-
-    model::Model model;
-    auto inputNode = model.AddNode<model::InputNode<ElementType>>(dim);
-    auto predictorNode = model.AddNode<nodes::LinearPredictorNode<ElementType>>(inputNode->output, predictor);
-    auto outputNode = model.AddNode<model::OutputNode<ElementType>>(predictorNode->output);
-
-    auto map = model::Map(model, { { "input", inputNode } }, { { "output", outputNode->output } });
-
-    model::MapCompilerOptions settings;
-    settings.mapFunctionName = "TestLinear";
-    model::IRMapCompiler compiler(settings);
-    auto compiledMap = compiler.Compile(map);
-
-    testing::ProcessTest("Testing IsValid of LinearPredictor map", testing::IsEqual(compiledMap.IsValid(), true));
-
-    // compare output
-    VerifyCompiledOutput(map, compiledMap, signal, " map");
-}
diff --git a/libraries/model/test/tcc/ModelMaker.tcc b/libraries/model/test/tcc/ModelMaker.tcc
deleted file mode 100644
index 853dbabe7..000000000
--- a/libraries/model/test/tcc/ModelMaker.tcc
+++ /dev/null
@@ -1,130 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     ModelMaker.tcc (compile_test)
-//  Authors:  Umesh Madan, Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-using namespace ell;
-
-template <typename T>
-model::InputNode<T>* ModelMaker::Inputs(size_t count)
-{
-    return _model.AddNode<model::InputNode<T>>(count);
-}
-
-template <typename T>
-model::InputNode<T>* ModelMaker::Inputs(std::vector<T>& values)
-{
-    auto node = Inputs<T>(values.size());
-    node->SetInput(values);
-    return node;
-}
-
-template <typename T>
-model::OutputNode<T>* ModelMaker::Outputs(const model::OutputPort<T>& x)
-{
-    return _model.AddNode<model::OutputNode<T>>(x);
-}
-
-template <typename T>
-nodes::BinaryOperationNode<T>* ModelMaker::Add(const model::OutputPort<T>& x, const model::OutputPort<T>& y)
-{
-    return _model.AddNode<nodes::BinaryOperationNode<T>>(x, y, emitters::BinaryOperationType::add);
-}
-
-template <typename T>
-nodes::BinaryOperationNode<T>* ModelMaker::Subtract(const model::OutputPort<T>& x, const model::OutputPort<T>& y)
-{
-    return _model.AddNode<nodes::BinaryOperationNode<T>>(x, y, emitters::BinaryOperationType::subtract);
-}
-
-template <typename T>
-nodes::BinaryOperationNode<T>* ModelMaker::Multiply(const model::OutputPort<T>& x, const model::OutputPort<T>& y)
-{
-    return _model.AddNode<nodes::BinaryOperationNode<T>>(x, y, emitters::BinaryOperationType::coordinatewiseMultiply);
-}
-
-template <typename T>
-nodes::BinaryOperationNode<T>* ModelMaker::Divide(const model::OutputPort<T>& x, const model::OutputPort<T>& y)
-{
-    return _model.AddNode<nodes::BinaryOperationNode<T>>(x, y, emitters::BinaryOperationType::coordinatewiseDivide);
-}
-
-template <typename T>
-nodes::DotProductNode<T>* ModelMaker::DotProduct(const model::OutputPort<T>& x, const model::OutputPort<T>& y)
-{
-    return _model.AddNode<nodes::DotProductNode<T>>(x, y);
-}
-
-template <typename T>
-nodes::BinaryPredicateNode<T>* ModelMaker::Equals(const model::OutputPort<T>& x, const model::OutputPort<T>& y)
-{
-    return _model.AddNode<nodes::BinaryPredicateNode<T>>(x, y, emitters::BinaryPredicateType::equal);
-}
-
-template <typename T>
-nodes::BinaryPredicateNode<T>* ModelMaker::Lt(const model::OutputPort<T>& x, const model::OutputPort<T>& y)
-{
-    return _model.AddNode<nodes::BinaryPredicateNode<T>>(x, y, emitters::BinaryPredicateType::less);
-}
-
-template <typename T>
-nodes::BinaryPredicateNode<T>* ModelMaker::Gt(const model::OutputPort<T>& x, const model::OutputPort<T>& y)
-{
-    return _model.AddNode<nodes::BinaryPredicateNode<T>>(x, y, emitters::BinaryPredicateType::greater);
-}
-
-template <typename T, typename S>
-nodes::MultiplexerNode<T, S>* ModelMaker::Select(const model::OutputPort<T>& elts, const model::OutputPort<S>& selector)
-{
-    auto node = _model.AddNode<nodes::MultiplexerNode<T, S>>(elts, selector);
-    return node;
-}
-
-template <typename T>
-nodes::UnaryOperationNode<T>* ModelMaker::Sqrt(const model::OutputPort<T>& x)
-{
-    return _model.AddNode<nodes::UnaryOperationNode<T>>(x, emitters::UnaryOperationType::sqrt);
-}
-
-template <typename T>
-nodes::SumNode<T>* ModelMaker::Sum(const model::OutputPort<T>& x)
-{
-    return _model.AddNode<nodes::SumNode<T>>(x);
-}
-
-template <typename T>
-nodes::DelayNode<T>* ModelMaker::Delay(const model::OutputPort<T>& x, size_t windowSize)
-{
-    return _model.AddNode<nodes::DelayNode<T>>(x, windowSize);
-}
-
-template <typename T>
-nodes::AccumulatorNode<T>* ModelMaker::Accumulate(const model::OutputPort<T>& x)
-{
-    return _model.AddNode<nodes::AccumulatorNode<T>>(x);
-}
-
-template <typename T>
-nodes::ConstantNode<T>* ModelMaker::Constant(const T value)
-{
-    return _model.AddNode<nodes::ConstantNode<T>>(value);
-}
-
-template <typename T>
-nodes::ConstantNode<T>* ModelMaker::Constant(const std::vector<T>& values)
-{
-    auto* pNode = _model.AddNode<nodes::ConstantNode<T>>(values);
-    // Work around a bug. Make sure literal values are propagated to outputs
-    _model.ComputeOutput<T>(pNode->output);
-    return pNode;
-}
-
-template <typename T>
-model::OutputPort<T>* ModelMaker::GetOutputPort(model::Node* pNode, size_t portIndex)
-{
-    auto pPort = pNode->GetOutputPorts()[portIndex];
-    return static_cast<model::OutputPort<T>*>(pPort);
-}
diff --git a/libraries/model_testing/CMakeLists.txt b/libraries/model_testing/CMakeLists.txt
index d5e66e0e2..296d84141 100644
--- a/libraries/model_testing/CMakeLists.txt
+++ b/libraries/model_testing/CMakeLists.txt
@@ -10,14 +10,11 @@ set(src
 set(include
     include/ModelTestUtilities.h
 )
-set(tcc
-    tcc/ModelTestUtilities.tcc )
 
 source_group("src" FILES ${src})
 source_group("include" FILES ${include})
-source_group("tcc" FILES ${tcc})
 
-add_library(${library_name} ${src} ${include} ${tcc})
+add_library(${library_name} ${src} ${include})
 target_include_directories(${library_name} PRIVATE include ${ELL_LIBRARIES_DIR})
 target_link_libraries(${library_name} utilities model nodes emitters testing)
 
diff --git a/libraries/model_testing/include/ModelTestUtilities.h b/libraries/model_testing/include/ModelTestUtilities.h
index 35f39c603..5ebc4e0a0 100644
--- a/libraries/model_testing/include/ModelTestUtilities.h
+++ b/libraries/model_testing/include/ModelTestUtilities.h
@@ -21,6 +21,7 @@
 #include <utilities/include/RandomEngines.h>
 
 #include <iostream>
+#include <random>
 #include <string>
 #include <vector>
 #include <random>
@@ -139,4 +140,337 @@ void FillWeightsTensor(ell::math::ChannelColumnRowTensor<ElementType>& tensor, E
 template <typename ElementType>
 void FillMatrix(math::RowMatrix<ElementType>& matrix, ElementType startValue = 0, ElementType step = 1);
 
-#include "../tcc/ModelTestUtilities.tcc"
+#pragma region implementation
+
+template <typename ValueType, typename InfoType>
+DebugNode<ValueType, InfoType>::DebugNode() :
+    model::Node({ &_input }, { &_output }),
+    _input(this, {}, defaultInputPortName),
+    _output(this, defaultOutputPortName, 0)
+{
+}
+
+template <typename ValueType, typename InfoType>
+DebugNode<ValueType, InfoType>::DebugNode(const model::OutputPort<ValueType>& input, InfoType debugInfo) :
+    model::Node({ &_input }, { &_output }),
+    _input(this, input, defaultInputPortName),
+    _output(this, defaultOutputPortName, _input.Size()),
+    _info(debugInfo)
+{
+}
+
+template <typename ValueType, typename InfoType>
+void DebugNode<ValueType, InfoType>::Copy(model::ModelTransformer& transformer) const
+{
+    const auto& newPortElements = transformer.GetCorrespondingInputs(_input);
+    auto newNode = transformer.AddNode<DebugNode<ValueType, InfoType>>(newPortElements, _info);
+    transformer.MapNodeOutput(output, newNode->output);
+}
+
+template <typename ValueType, typename InfoType>
+void DebugNode<ValueType, InfoType>::Compute() const
+{
+    _output.SetOutput(_input.GetValue());
+}
+
+template <typename ValueType, typename InfoType>
+void DebugNode<ValueType, InfoType>::WriteToArchive(utilities::Archiver& archiver) const
+{
+    // nothing
+}
+
+template <typename ValueType, typename InfoType>
+void DebugNode<ValueType, InfoType>::ReadFromArchive(utilities::Unarchiver& archiver)
+{
+    // nothing
+}
+
+//
+//
+//
+template <typename ValueType>
+ValueType LargestDifference(const std::vector<ValueType>& a, const std::vector<ValueType>& b)
+{
+    ValueType largestDifference = 0;
+    auto size = a.size();
+    for (size_t index = 0; index < size; ++index)
+    {
+        auto difference = a[index] - b[index];
+        if (std::fabs(difference) > std::fabs(largestDifference))
+        {
+            largestDifference = difference;
+        }
+    }
+    return largestDifference;
+}
+
+template <typename ValueType>
+bool IsEqual(const ValueType& a, const ValueType& b, double epsilon = 1e-6)
+{
+    return testing::IsEqual(a, b);
+}
+
+template <>
+inline bool IsEqual(const float& a, const float& b, double epsilon)
+{
+    return testing::IsEqual(a, b, static_cast<float>(epsilon));
+}
+
+template <>
+inline bool IsEqual(const double& a, const double& b, double epsilon)
+{
+    return testing::IsEqual(a, b, epsilon);
+}
+
+template <>
+inline bool IsEqual(const std::vector<float>& a, const std::vector<float>& b, double epsilon)
+{
+    return testing::IsEqual(a, b, static_cast<float>(epsilon));
+}
+
+template <>
+inline bool IsEqual(const std::vector<double>& a, const std::vector<double>& b, double epsilon)
+{
+    return testing::IsEqual(a, b, epsilon);
+}
+
+template <typename T>
+std::ostream& operator<<(std::ostream& out, const std::vector<T>& v)
+{
+    out << "[";
+    for (size_t index = 0; index < v.size(); ++index)
+    {
+        if (index != 0)
+            out << ", ";
+        out << v[index];
+    }
+    out << "]";
+    return out;
+}
+
+template <typename InputType, typename OutputType>
+void PrintCompiledOutput(const model::Map& map, const model::IRCompiledMap& compiledMap, const std::vector<std::vector<InputType>>& signal, const std::string& name)
+{
+    if (!IsVerbose())
+    {
+        return;
+    }
+
+    // compare output
+    for (const auto& input : signal)
+    {
+        auto computedResult = map.Compute<OutputType>(input);
+        auto compiledResult = compiledMap.Compute<OutputType>(input);
+        std::cout << computedResult << " \t" << compiledResult << std::endl;
+    }
+}
+
+template <typename InputType>
+void PrintCompiledOutput(const model::Map& map, const model::IRCompiledMap& compiledMap, const std::vector<std::vector<InputType>>& signal, const std::string& name)
+{
+    switch (map.GetOutput(0).GetPortType())
+    {
+    case model::Port::PortType::boolean:
+        PrintCompiledOutput<InputType, bool>(map, compiledMap, signal, name);
+        break;
+    case model::Port::PortType::integer:
+        PrintCompiledOutput<InputType, int>(map, compiledMap, signal, name);
+        break;
+    case model::Port::PortType::bigInt:
+        PrintCompiledOutput<InputType, int64_t>(map, compiledMap, signal, name);
+        break;
+    case model::Port::PortType::smallReal:
+        PrintCompiledOutput<InputType, float>(map, compiledMap, signal, name);
+        break;
+    case model::Port::PortType::real:
+        PrintCompiledOutput<InputType, double>(map, compiledMap, signal, name);
+        break;
+    default:
+        throw utilities::InputException(utilities::InputExceptionErrors::typeMismatch);
+    }
+}
+
+template <typename InputType, typename OutputType>
+void VerifyMapOutput(const model::Map& map, std::vector<std::vector<InputType>>& signal, std::vector<std::vector<OutputType>>& expectedOutput, const std::string& name)
+{
+    bool ok = true;
+    // compare output
+    for (size_t index = 0; index < signal.size(); ++index)
+    {
+        auto&& input = signal[index];
+        auto&& output = expectedOutput[index];
+        map.SetInputValue(0, input);
+        auto computedResult = map.ComputeOutput<OutputType>(0);
+
+        ok = ok && IsEqual(output, computedResult);
+
+        if (IsVerbose())
+        {
+            std::cout << computedResult << " \t" << output << std::endl;
+        }
+    }
+    testing::ProcessTest(std::string("Testing map " + name + " compute"), ok);
+}
+
+template <typename InputType, typename OutputType>
+std::vector<OutputType> VerifyCompiledOutput(const model::Map& map, const model::IRCompiledMap& compiledMap, const std::vector<std::vector<InputType>>& signal, const std::string& name, double epsilon)
+{
+    bool ok = true;
+    std::vector<OutputType> computedResult;
+    // compare output
+    for (const auto& input : signal)
+    {
+        map.SetInputValue(0, input);
+        computedResult = map.ComputeOutput<OutputType>(0);
+
+        compiledMap.SetInputValue(0, input);
+        auto compiledResult = compiledMap.ComputeOutput<OutputType>(0);
+        ok = ok && IsEqual(computedResult, compiledResult, static_cast<OutputType>(epsilon));
+
+        if (IsVerbose() || !ok)
+        {
+            std::cout << "input: " << input << std::endl;
+            std::cout << "computed: " << computedResult << std::endl;
+            std::cout << "compiled: " << compiledResult << std::endl;
+            std::cout << "Largest difference: " << LargestDifference(computedResult, compiledResult) << ", epsilon: " << epsilon << std::endl;
+        }
+    }
+    testing::ProcessTest(std::string("Testing compiled " + name + " compute"), ok);
+    return computedResult;
+}
+
+template <typename InputType>
+void VerifyCompiledOutput(const model::Map& map, const model::IRCompiledMap& compiledMap, const std::vector<std::vector<InputType>>& signal, const std::string& name, double epsilon)
+{
+    switch (map.GetOutput(0).GetPortType())
+    {
+    case model::Port::PortType::boolean:
+        VerifyCompiledOutput<InputType, bool>(map, compiledMap, signal, name, epsilon);
+        break;
+    case model::Port::PortType::integer:
+        VerifyCompiledOutput<InputType, int>(map, compiledMap, signal, name, epsilon);
+        break;
+    case model::Port::PortType::bigInt:
+        VerifyCompiledOutput<InputType, int64_t>(map, compiledMap, signal, name, epsilon);
+        break;
+    case model::Port::PortType::smallReal:
+        VerifyCompiledOutput<InputType, float>(map, compiledMap, signal, name, epsilon);
+        break;
+    case model::Port::PortType::real:
+        VerifyCompiledOutput<InputType, double>(map, compiledMap, signal, name, epsilon);
+        break;
+    default:
+        throw utilities::InputException(utilities::InputExceptionErrors::typeMismatch);
+    }
+}
+
+template <typename ValueType>
+class Uniform
+{
+public:
+    Uniform(ValueType minVal, ValueType maxVal, std::string seed = "123") :
+        _rng(utilities::GetRandomEngine(seed)),
+        _range(static_cast<double>(_rng.max() - _rng.min())),
+        _minOutput(minVal),
+        _outputRange(maxVal - minVal) {}
+
+    ValueType operator()()
+    {
+        double uniform = static_cast<double>(_rng()) / _range;
+        return static_cast<ValueType>((uniform * _outputRange) + _minOutput);
+    }
+
+private:
+    std::default_random_engine _rng;
+    double _range;
+    ValueType _minOutput;
+    ValueType _outputRange;
+};
+
+template <typename ElementType>
+void FillRandomVector(std::vector<ElementType>& vector, ElementType min, ElementType max)
+{
+    Uniform<ElementType> rand(min, max);
+    std::generate(vector.begin(), vector.end(), rand);
+}
+
+template <typename ElementType>
+std::vector<ElementType> GetRandomVector(size_t size, ElementType min, ElementType max)
+{
+    std::vector<ElementType> result(size);
+    FillRandomVector(result, min, max);
+    return result;
+}
+
+template <typename ElementType>
+void FillRandomVector(ell::math::ColumnVector<ElementType>& vector, ElementType min, ElementType max)
+{
+    Uniform<ElementType> rand(min, max);
+    vector.Generate(rand);
+}
+
+template <typename ElementType>
+void FillRandomTensor(ell::math::ChannelColumnRowTensor<ElementType>& tensor, ElementType min, ElementType max)
+{
+    Uniform<ElementType> rand(min, max);
+    tensor.Generate(rand);
+}
+
+template <typename ElementType>
+void FillVector(std::vector<ElementType>& vector, ElementType startValue, ElementType step)
+{
+    ElementType val = startValue;
+    std::generate(vector.begin(), vector.end(), [&val, step]() {
+        auto result = val;
+        val += step;
+        return result; });
+}
+
+template <typename ElementType>
+void FillVector(ell::math::ColumnVector<ElementType>& vector, ElementType startValue, ElementType step)
+{
+    ElementType val = startValue;
+    vector.Generate([&val]() { return val++; });
+}
+
+template <typename ElementType>
+void FillTensor(ell::math::ChannelColumnRowTensor<ElementType>& tensor, ElementType startValue, ElementType step)
+{
+    ElementType val = startValue;
+    tensor.Generate([&val, step]() {
+        auto result = val;
+        val += step;
+        return result; });
+}
+
+template <typename ElementType>
+void FillTensor(math::TensorReference<ElementType, math::Dimension::channel, math::Dimension::column, math::Dimension::row>& tensor, ElementType startValue, ElementType step)
+{
+    ElementType val = startValue;
+    tensor.Generate([&val, step]() {
+        auto result = val;
+        val += step;
+        return result; });
+}
+
+template <typename ElementType>
+void FillWeightsTensor(ell::math::ChannelColumnRowTensor<ElementType>& tensor, ElementType startValue, ElementType step)
+{
+    ElementType val = startValue;
+    tensor.Generate([&val, step]() {
+        auto result = val;
+        val += step;
+        return result; });
+}
+
+template <typename ElementType>
+void FillMatrix(math::RowMatrix<ElementType>& matrix, ElementType startValue, ElementType step)
+{
+    ElementType val = startValue;
+    matrix.Generate([&val, step]() {
+        auto result = val;
+        val += step;
+        return result; });
+}
+
+#pragma endregion implementation
diff --git a/libraries/model_testing/tcc/ModelTestUtilities.tcc b/libraries/model_testing/tcc/ModelTestUtilities.tcc
deleted file mode 100644
index 0826d9926..000000000
--- a/libraries/model_testing/tcc/ModelTestUtilities.tcc
+++ /dev/null
@@ -1,338 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     ModelTestUtilities.tcc (compile_test)
-//  Authors:  Umesh Madan, Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template <typename ValueType, typename InfoType>
-DebugNode<ValueType, InfoType>::DebugNode() :
-    model::Node({ &_input }, { &_output }),
-    _input(this, {}, defaultInputPortName),
-    _output(this, defaultOutputPortName, 0)
-{
-}
-
-template <typename ValueType, typename InfoType>
-DebugNode<ValueType, InfoType>::DebugNode(const model::OutputPort<ValueType>& input, InfoType debugInfo) :
-    model::Node({ &_input }, { &_output }),
-    _input(this, input, defaultInputPortName),
-    _output(this, defaultOutputPortName, _input.Size()),
-    _info(debugInfo)
-{
-}
-
-template <typename ValueType, typename InfoType>
-void DebugNode<ValueType, InfoType>::Copy(model::ModelTransformer& transformer) const
-{
-    const auto& newPortElements = transformer.GetCorrespondingInputs(_input);
-    auto newNode = transformer.AddNode<DebugNode<ValueType, InfoType>>(newPortElements, _info);
-    transformer.MapNodeOutput(output, newNode->output);
-}
-
-template <typename ValueType, typename InfoType>
-void DebugNode<ValueType, InfoType>::Compute() const
-{
-    _output.SetOutput(_input.GetValue());
-}
-
-template <typename ValueType, typename InfoType>
-void DebugNode<ValueType, InfoType>::WriteToArchive(utilities::Archiver& archiver) const
-{
-    // nothing
-}
-
-template <typename ValueType, typename InfoType>
-void DebugNode<ValueType, InfoType>::ReadFromArchive(utilities::Unarchiver& archiver)
-{
-    // nothing
-}
-
-//
-//
-//
-template <typename ValueType>
-ValueType LargestDifference(const std::vector<ValueType>& a, const std::vector<ValueType>& b)
-{
-    ValueType largestDifference = 0;
-    auto size = a.size();
-    for (size_t index = 0; index < size; ++index)
-    {
-        auto difference = a[index] - b[index];
-        if (std::fabs(difference) > std::fabs(largestDifference))
-        {
-            largestDifference = difference;
-        }
-    }
-    return largestDifference;
-}
-
-template <typename ValueType>
-bool IsEqual(const ValueType& a, const ValueType& b, double epsilon = 1e-6)
-{
-    return testing::IsEqual(a, b);
-}
-
-template <>
-inline bool IsEqual(const float& a, const float& b, double epsilon)
-{
-    return testing::IsEqual(a, b, static_cast<float>(epsilon));
-}
-
-template <>
-inline bool IsEqual(const double& a, const double& b, double epsilon)
-{
-    return testing::IsEqual(a, b, epsilon);
-}
-
-template <>
-inline bool IsEqual(const std::vector<float>& a, const std::vector<float>& b, double epsilon)
-{
-    return testing::IsEqual(a, b, static_cast<float>(epsilon));
-}
-
-template <>
-inline bool IsEqual(const std::vector<double>& a, const std::vector<double>& b, double epsilon)
-{
-    return testing::IsEqual(a, b, epsilon);
-}
-
-template <typename T>
-std::ostream& operator<<(std::ostream& out, const std::vector<T>& v)
-{
-    out << "[";
-    for (size_t index = 0; index < v.size(); ++index)
-    {
-        if (index != 0)
-            out << ", ";
-        out << v[index];
-    }
-    out << "]";
-    return out;
-}
-
-template <typename InputType, typename OutputType>
-void PrintCompiledOutput(const model::Map& map, const model::IRCompiledMap& compiledMap, const std::vector<std::vector<InputType>>& signal, const std::string& name)
-{
-    if (!IsVerbose())
-    {
-        return;
-    }
-
-    // compare output
-    for (const auto& input : signal)
-    {
-        auto computedResult = map.Compute<OutputType>(input);
-        auto compiledResult = compiledMap.Compute<OutputType>(input);
-        std::cout << computedResult << " \t" << compiledResult << std::endl;
-    }
-}
-
-template <typename InputType>
-void PrintCompiledOutput(const model::Map& map, const model::IRCompiledMap& compiledMap, const std::vector<std::vector<InputType>>& signal, const std::string& name)
-{
-    switch (map.GetOutput(0).GetPortType())
-    {
-    case model::Port::PortType::boolean:
-        PrintCompiledOutput<InputType, bool>(map, compiledMap, signal, name);
-        break;
-    case model::Port::PortType::integer:
-        PrintCompiledOutput<InputType, int>(map, compiledMap, signal, name);
-        break;
-    case model::Port::PortType::bigInt:
-        PrintCompiledOutput<InputType, int64_t>(map, compiledMap, signal, name);
-        break;
-    case model::Port::PortType::smallReal:
-        PrintCompiledOutput<InputType, float>(map, compiledMap, signal, name);
-        break;
-    case model::Port::PortType::real:
-        PrintCompiledOutput<InputType, double>(map, compiledMap, signal, name);
-        break;
-    default:
-        throw utilities::InputException(utilities::InputExceptionErrors::typeMismatch);
-    }
-}
-
-template <typename InputType, typename OutputType>
-void VerifyMapOutput(const model::Map& map, std::vector<std::vector<InputType>>& signal, std::vector<std::vector<OutputType>>& expectedOutput, const std::string& name)
-{
-    bool ok = true;
-    // compare output
-    for (size_t index = 0; index < signal.size(); ++index)
-    {
-        auto&& input = signal[index];
-        auto&& output = expectedOutput[index];
-        map.SetInputValue(0, input);
-        auto computedResult = map.ComputeOutput<OutputType>(0);
-
-        ok = ok && IsEqual(output, computedResult);
-
-        if (IsVerbose())
-        {
-            std::cout << computedResult << " \t" << output << std::endl;
-        }
-    }
-    testing::ProcessTest(std::string("Testing map " + name + " compute"), ok);
-}
-
-template <typename InputType, typename OutputType>
-std::vector<OutputType> VerifyCompiledOutput(const model::Map& map, const model::IRCompiledMap& compiledMap, const std::vector<std::vector<InputType>>& signal, const std::string& name, double epsilon)
-{
-    bool ok = true;
-    std::vector<OutputType> computedResult;
-    // compare output
-    for (const auto& input : signal)
-    {
-        map.SetInputValue(0, input);
-        computedResult = map.ComputeOutput<OutputType>(0);
-
-        compiledMap.SetInputValue(0, input);
-        auto compiledResult = compiledMap.ComputeOutput<OutputType>(0);
-        ok = ok && IsEqual(computedResult, compiledResult, static_cast<OutputType>(epsilon));
-
-        if (IsVerbose() || !ok)
-        {
-            std::cout << "input: " << input << std::endl;
-            std::cout << "computed: " << computedResult << std::endl;
-            std::cout << "compiled: " << compiledResult << std::endl;
-            std::cout << "Largest difference: " << LargestDifference(computedResult, compiledResult) << ", epsilon: " << epsilon << std::endl;
-        }
-    }
-    testing::ProcessTest(std::string("Testing compiled " + name + " compute"), ok);
-    return computedResult;
-}
-
-template <typename InputType>
-void VerifyCompiledOutput(const model::Map& map, const model::IRCompiledMap& compiledMap, const std::vector<std::vector<InputType>>& signal, const std::string& name, double epsilon)
-{
-    switch (map.GetOutput(0).GetPortType())
-    {
-    case model::Port::PortType::boolean:
-        VerifyCompiledOutput<InputType, bool>(map, compiledMap, signal, name, epsilon);
-        break;
-    case model::Port::PortType::integer:
-        VerifyCompiledOutput<InputType, int>(map, compiledMap, signal, name, epsilon);
-        break;
-    case model::Port::PortType::bigInt:
-        VerifyCompiledOutput<InputType, int64_t>(map, compiledMap, signal, name, epsilon);
-        break;
-    case model::Port::PortType::smallReal:
-        VerifyCompiledOutput<InputType, float>(map, compiledMap, signal, name, epsilon);
-        break;
-    case model::Port::PortType::real:
-        VerifyCompiledOutput<InputType, double>(map, compiledMap, signal, name, epsilon);
-        break;
-    default:
-        throw utilities::InputException(utilities::InputExceptionErrors::typeMismatch);
-    }
-}
-
-template <typename ValueType>
-class Uniform
-{
-public:
-    Uniform(ValueType minVal, ValueType maxVal, std::string seed = "123") :
-        _rng(utilities::GetRandomEngine(seed)),
-        _range(static_cast<double>(_rng.max() - _rng.min())),
-        _minOutput(minVal),
-        _outputRange(maxVal - minVal) {}
-
-    ValueType operator()()
-    {
-        double uniform = static_cast<double>(_rng()) / _range;
-        return static_cast<ValueType>((uniform * _outputRange) + _minOutput);
-    }
-
-private:
-    std::default_random_engine _rng;
-    double _range;
-    ValueType _minOutput;
-    ValueType _outputRange;
-};
-
-template <typename ElementType>
-void FillRandomVector(std::vector<ElementType>& vector, ElementType min, ElementType max)
-{
-    Uniform<ElementType> rand(min, max);
-    std::generate(vector.begin(), vector.end(), rand);
-}
-
-template <typename ElementType>
-std::vector<ElementType> GetRandomVector(size_t size, ElementType min, ElementType max)
-{
-    std::vector<ElementType> result(size);
-    FillRandomVector(result, min, max);
-    return result;
-}
-
-template <typename ElementType>
-void FillRandomVector(ell::math::ColumnVector<ElementType>& vector, ElementType min, ElementType max)
-{
-    Uniform<ElementType> rand(min, max);
-    vector.Generate(rand);
-}
-
-template <typename ElementType>
-void FillRandomTensor(ell::math::ChannelColumnRowTensor<ElementType>& tensor, ElementType min, ElementType max)
-{
-    Uniform<ElementType> rand(min, max);
-    tensor.Generate(rand);
-}
-
-template <typename ElementType>
-void FillVector(std::vector<ElementType>& vector, ElementType startValue, ElementType step)
-{
-    ElementType val = startValue;
-    std::generate(vector.begin(), vector.end(), [&val, step]() {
-        auto result = val;
-        val += step;
-        return result; });
-}
-
-template <typename ElementType>
-void FillVector(ell::math::ColumnVector<ElementType>& vector, ElementType startValue, ElementType step)
-{
-    ElementType val = startValue;
-    vector.Generate([&val]() { return val++; });
-}
-
-template <typename ElementType>
-void FillTensor(ell::math::ChannelColumnRowTensor<ElementType>& tensor, ElementType startValue, ElementType step)
-{
-    ElementType val = startValue;
-    tensor.Generate([&val, step]() {
-        auto result = val;
-        val += step;
-        return result; });
-}
-
-template <typename ElementType>
-void FillTensor(math::TensorReference<ElementType, math::Dimension::channel, math::Dimension::column, math::Dimension::row>& tensor, ElementType startValue, ElementType step)
-{
-    ElementType val = startValue;
-    tensor.Generate([&val, step]() {
-        auto result = val;
-        val += step;
-        return result; });
-}
-
-template <typename ElementType>
-void FillWeightsTensor(ell::math::ChannelColumnRowTensor<ElementType>& tensor, ElementType startValue, ElementType step)
-{
-    ElementType val = startValue;
-    tensor.Generate([&val, step]() {
-        auto result = val;
-        val += step;
-        return result; });
-}
-
-template <typename ElementType>
-void FillMatrix(math::RowMatrix<ElementType>& matrix, ElementType startValue, ElementType step)
-{
-    ElementType val = startValue;
-    matrix.Generate([&val, step]() {
-        auto result = val;
-        val += step;
-        return result; });
-}
diff --git a/libraries/nodes/CMakeLists.txt b/libraries/nodes/CMakeLists.txt
index 273bd0ce2..9e31e86ff 100644
--- a/libraries/nodes/CMakeLists.txt
+++ b/libraries/nodes/CMakeLists.txt
@@ -103,48 +103,10 @@ set(include
     include/WinogradConvolutionNode.h
 )
 
-set (tcc
-    tcc/AccumulatorNode.tcc
-    tcc/BinaryFunctionNode.tcc
-    tcc/BinaryOperationNode.tcc
-    tcc/BinaryPredicateNode.tcc
-    tcc/BroadcastFunctionNode.tcc
-    tcc/BufferNode.tcc
-    tcc/ConcatenationNode.tcc
-    tcc/ConstantNode.tcc
-    tcc/DTWDistanceNode.tcc
-    tcc/DebugSinkNode.tcc
-    tcc/DelayNode.tcc
-    tcc/DemultiplexerNode.tcc
-    tcc/DotProductNode.tcc
-    tcc/ExtremalValueNode.tcc
-    tcc/ForestPredictorNode.tcc
-    tcc/HammingWindowNode.tcc
-    tcc/L2NormSquaredNode.tcc
-    tcc/LinearPredictorNode.tcc
-    tcc/MatrixVectorProductNode.tcc
-    tcc/MovingAverageNode.tcc
-    tcc/MovingVarianceNode.tcc
-    tcc/MultiplexerNode.tcc
-    tcc/NeuralNetworkLayerNode.tcc
-    tcc/NeuralNetworkPredictorNode.tcc
-    tcc/ReceptiveFieldMatrixNode.tcc
-    tcc/ReorderDataNode.tcc
-    tcc/SinkNode.tcc
-    tcc/SourceNode.tcc
-    tcc/SquaredEuclideanDistanceNode.tcc
-    tcc/SumNode.tcc
-    tcc/TypeCastNode.tcc
-    tcc/UnaryOperationNode.tcc
-    tcc/VoiceActivityDetectorNode.tcc
-    tcc/ValueSelectorNode.tcc
-)
-
 source_group("src" FILES ${src})
 source_group("include" FILES ${include})
-source_group("tcc" FILES ${tcc})
 
-add_library(${library_name} ${src} ${include} ${tcc})
+add_library(${library_name} ${src} ${include})
 
 target_include_directories(${library_name}
     PRIVATE
diff --git a/libraries/nodes/include/AccumulatorNode.h b/libraries/nodes/include/AccumulatorNode.h
index faf99acf3..5efa810d9 100644
--- a/libraries/nodes/include/AccumulatorNode.h
+++ b/libraries/nodes/include/AccumulatorNode.h
@@ -81,4 +81,112 @@ namespace nodes
 } // namespace nodes
 } // namespace ell
 
-#include "../tcc/AccumulatorNode.tcc"
\ No newline at end of file
+#pragma region implementation
+
+namespace ell
+{
+namespace nodes
+{
+    template <typename ValueType>
+    AccumulatorNode<ValueType>::AccumulatorNode() :
+        CompilableNode({ &_input }, { &_output }),
+        _input(this, {}, defaultInputPortName),
+        _output(this, defaultOutputPortName, 0)
+    {
+    }
+
+    template <typename ValueType>
+    AccumulatorNode<ValueType>::AccumulatorNode(const model::OutputPort<ValueType>& input) :
+        CompilableNode({ &_input }, { &_output }),
+        _input(this, input, defaultInputPortName),
+        _output(this, defaultOutputPortName, _input.Size())
+    {
+        auto dimension = input.Size();
+        _accumulator = std::vector<ValueType>(dimension);
+    }
+
+    template <typename ValueType>
+    void AccumulatorNode<ValueType>::Compute() const
+    {
+        for (size_t index = 0; index < _input.Size(); ++index)
+        {
+            _accumulator[index] += _input[index];
+        }
+        _output.SetOutput(_accumulator);
+    };
+
+    template <typename ValueType>
+    void AccumulatorNode<ValueType>::Copy(model::ModelTransformer& transformer) const
+    {
+        const auto& newPortElements = transformer.GetCorrespondingInputs(_input);
+        auto newNode = transformer.AddNode<AccumulatorNode<ValueType>>(newPortElements);
+        transformer.MapNodeOutput(output, newNode->output);
+    }
+
+    template <typename ValueType>
+    void AccumulatorNode<ValueType>::Compile(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
+    {
+        static_assert(!std::is_same<ValueType, bool>(), "Cannot instantiate boolean accumulator nodes");
+        assert(GetPortVariableType(input) == GetPortVariableType(output));
+
+        // Allocate a global variable to accumulate the input
+        emitters::Variable* pAccumulatorVar = function.GetModule().Variables().AddVariable<emitters::InitializedVectorVariable<ValueType>>(emitters::VariableScope::global, output.Size());
+        emitters::LLVMValue accumulator = function.GetModule().EnsureEmitted(*pAccumulatorVar);
+
+        if (!compiler.GetCompilerOptions().unrollLoops)
+        {
+            CompileLoop(compiler, function, accumulator);
+        }
+        else
+        {
+            CompileExpanded(compiler, function, accumulator);
+        }
+    }
+
+    template <typename ValueType>
+    void AccumulatorNode<ValueType>::CompileLoop(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function, emitters::LLVMValue accumulator)
+    {
+        emitters::LLVMValue inputVector = compiler.EnsurePortEmitted(input);
+        emitters::LLVMValue result = compiler.EnsurePortEmitted(output);
+
+        function.VectorOperator(emitters::GetAddForValueType<ValueType>(), output.Size(), accumulator, inputVector, [&accumulator, &result, &function](emitters::LLVMValue i, emitters::LLVMValue value) {
+            function.SetValueAt(accumulator, i, value);
+            function.SetValueAt(result, i, value);
+        });
+    }
+
+    template <typename ValueType>
+    void AccumulatorNode<ValueType>::CompileExpanded(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function, emitters::LLVMValue accumulator)
+    {
+        emitters::LLVMValue result = compiler.EnsurePortEmitted(output);
+        for (size_t index = 0; index < output.Size(); ++index)
+        {
+            emitters::LLVMValue inputValue = compiler.LoadPortElementVariable(input.GetInputElement(index));
+            emitters::LLVMValue accumValue = function.ValueAt(accumulator, function.Literal((int)index));
+            emitters::LLVMValue sum = function.Operator(emitters::GetAddForValueType<ValueType>(), inputValue, accumValue);
+            function.SetValueAt(accumulator, function.Literal((int)index), sum);
+            function.SetValueAt(result, function.Literal((int)index), sum);
+        }
+    }
+
+    template <typename ValueType>
+    void AccumulatorNode<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
+    {
+        Node::WriteToArchive(archiver);
+        archiver[defaultInputPortName] << _input;
+    }
+
+    template <typename ValueType>
+    void AccumulatorNode<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
+    {
+        Node::ReadFromArchive(archiver);
+        archiver[defaultInputPortName] >> _input;
+
+        auto dimension = _input.Size();
+        _accumulator = std::vector<ValueType>(dimension);
+        _output.SetSize(dimension);
+    }
+} // namespace nodes
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/nodes/include/BatchNormalizationLayerNode.h b/libraries/nodes/include/BatchNormalizationLayerNode.h
index 992e96992..1efb9b5ea 100644
--- a/libraries/nodes/include/BatchNormalizationLayerNode.h
+++ b/libraries/nodes/include/BatchNormalizationLayerNode.h
@@ -8,9 +8,9 @@
 
 #pragma once
 
+#include "NeuralNetworkLayerNode.h"
 #include <model/include/IRMapCompiler.h>
 #include <model/include/ModelTransformer.h>
-#include "NeuralNetworkLayerNode.h"
 #include <model/include/PortElements.h>
 
 #include <predictors/neural/include/BatchNormalizationLayer.h>
diff --git a/libraries/nodes/include/BinaryFunctionNode.h b/libraries/nodes/include/BinaryFunctionNode.h
index 240d285a1..286349735 100644
--- a/libraries/nodes/include/BinaryFunctionNode.h
+++ b/libraries/nodes/include/BinaryFunctionNode.h
@@ -138,4 +138,252 @@ namespace nodes
 } // namespace nodes
 } // namespace ell
 
-#include "../tcc/BinaryFunctionNode.tcc"
\ No newline at end of file
+#pragma region implementation
+
+namespace ell
+{
+namespace nodes
+{
+    template <typename ValueType, typename FunctionType>
+    BinaryFunctionNode<ValueType, FunctionType>::BinaryFunctionNode() :
+        CompilableNode({ &_input1, &_input2 }, { &_output }),
+        _input1(this, {}, defaultInput1PortName),
+        _input2(this, {}, defaultInput2PortName),
+        _output(this, defaultOutputPortName, 0),
+        _paddingValue(0)
+    {
+    }
+
+    template <typename ValueType, typename FunctionType>
+    BinaryFunctionNode<ValueType, FunctionType>::BinaryFunctionNode(const model::OutputPort<ValueType>& input1, const model::OutputPort<ValueType>& input2, FunctionType function, ValueType padding) :
+        BinaryFunctionNode(input1, input2, input1.GetMemoryLayout(), function, padding)
+    {
+    }
+
+    template <typename ValueType, typename FunctionType>
+    BinaryFunctionNode<ValueType, FunctionType>::BinaryFunctionNode(const model::OutputPort<ValueType>& input1, const model::OutputPort<ValueType>& input2, const model::PortMemoryLayout& layout, FunctionType function, ValueType padding) :
+        BinaryFunctionNode(input1, input2, input1.GetMemoryLayout(), input1.GetMemoryLayout(), function, padding)
+    {
+    }
+
+    template <typename ValueType, typename FunctionType>
+    BinaryFunctionNode<ValueType, FunctionType>::BinaryFunctionNode(const model::OutputPort<ValueType>& input1, const model::OutputPort<ValueType>& input2, const model::PortMemoryLayout& inputLayout, const model::PortMemoryLayout& outputLayout, FunctionType function, ValueType padding) :
+        CompilableNode({ &_input1, &_input2 }, { &_output }),
+        _input1(this, input1, defaultInput1PortName),
+        _input2(this, input2, defaultInput2PortName),
+        _inputLayout(inputLayout),
+        _output(this, defaultOutputPortName, outputLayout),
+        _function(std::move(function)),
+        _paddingValue(padding)
+    {
+        if (input1.Size() != input2.Size())
+        {
+            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "Input sizes must match");
+        }
+
+        if (inputLayout.GetActiveSize() != outputLayout.GetActiveSize())
+        {
+            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument,
+                                            ell::utilities::FormatString("Input 1 active area size %d doesn't match input 2 active area size %d on BinaryFunctionNode %s",
+                                                                         inputLayout.GetActiveSize().NumElements(),
+                                                                         outputLayout.GetActiveSize().NumElements(),
+                                                                         GetId().ToString().c_str()));
+        }
+    }
+
+    template <typename ValueType, typename FunctionType>
+    void BinaryFunctionNode<ValueType, FunctionType>::Compute() const
+    {
+        auto outputLayout = _output.GetMemoryLayout();
+        auto outputSize = outputLayout.GetExtent().NumElements();
+        auto output = std::vector<ValueType>(outputSize);
+
+        const size_t prevInputOffset = 0;
+        const size_t prevOutputOffset = 0;
+        ComputeDimensionLoop(0, output, prevInputOffset, prevOutputOffset);
+
+        _output.SetOutput(output);
+    }
+
+    //
+    // Arbitrary-depth nested loops are generated recursively. The ComputeDimensionLoop
+    // function emits `numDimensions` nested loops of the form:
+    //
+    // for(iz = 0; iz < sz; ++iz)
+    // {
+    //     zOffset = (iz+offset[2]) * stride[2];
+    //     for(iy = 0; iy < sy; ++iy)
+    //     {
+    //         yOffset = zOffset + (iy+offset[1]) * stride[1];
+    //         for(ix = 0; ix < sx; ++ix)
+    //         {
+    //             offset = yOffset + (ix+offset[0]) * stride[0];
+    //             x = arr[offset];
+    //             val = f(x);
+    //             output[offset] = val;
+    //         }
+    //     }
+    // }
+    //
+
+    template <typename ValueType, typename FunctionType>
+    void BinaryFunctionNode<ValueType, FunctionType>::ComputeDimensionLoop(size_t dimension,
+                                                                           std::vector<ValueType>& output,
+                                                                           size_t prevInputDimensionOffset,
+                                                                           size_t prevOutputDimensionOffset) const
+    {
+        auto outputLayout = _output.GetMemoryLayout();
+        const auto numDimensions = _inputLayout.NumDimensions();
+        auto&& inputStride = _inputLayout.GetExtent();
+        auto&& inputOffset = _inputLayout.GetOffset();
+        auto&& inputSize = _inputLayout.GetActiveSize();
+        auto&& outputOffset = outputLayout.GetOffset();
+        auto&& outputStride = outputLayout.GetExtent();
+
+        for (int loopIndex = 0; loopIndex < inputSize[dimension]; ++loopIndex)
+        {
+            // offset within start of this dimension = (loopIndex + offset[dimension])
+            auto thisInputDimensionInternalOffset = loopIndex + inputOffset[dimension];
+            auto thisOutputDimensionInternalOffset = loopIndex + outputOffset[dimension];
+
+            size_t thisInputDimensionOffset = thisInputDimensionInternalOffset;
+            size_t thisOutputDimensionOffset = thisOutputDimensionInternalOffset;
+            if (dimension != 0)
+            {
+                thisInputDimensionOffset += prevInputDimensionOffset * inputStride[dimension];
+                thisOutputDimensionOffset += prevOutputDimensionOffset * outputStride[dimension];
+            }
+
+            if (static_cast<int>(dimension) < numDimensions - 1)
+            {
+                // Recursive call to emit nested loop
+                ComputeDimensionLoop(dimension + 1, output, thisInputDimensionOffset, thisOutputDimensionOffset);
+            }
+            else
+            {
+                // We're in the innermost loop --- compute the value
+                auto value1 = _input1[thisInputDimensionOffset];
+                auto value2 = _input2[thisInputDimensionOffset];
+                auto outputValue = _function.Compute(value1, value2);
+                output[thisOutputDimensionOffset] = outputValue;
+            }
+        }
+    }
+
+    template <typename ValueType, typename FunctionType>
+    void BinaryFunctionNode<ValueType, FunctionType>::Compile(model::IRMapCompiler& compiler,
+                                                              emitters::IRFunctionEmitter& function)
+    {
+        emitters::LLVMValue pInput1 = compiler.EnsurePortEmitted(input1);
+        emitters::LLVMValue pInput2 = compiler.EnsurePortEmitted(input2);
+        emitters::LLVMValue pResult = compiler.EnsurePortEmitted(output, _paddingValue);
+
+        // Call recursive function to emit nested loops
+        emitters::LLVMValue prevInputDimensionOffset = nullptr;
+        emitters::LLVMValue prevOutputDimensionOffset = nullptr;
+        EmitComputeDimensionLoop(compiler, function, 0, pInput1, pInput2, pResult, prevInputDimensionOffset, prevOutputDimensionOffset);
+    }
+
+    template <typename ValueType, typename FunctionType>
+    void BinaryFunctionNode<ValueType, FunctionType>::EmitComputeDimensionLoop(model::IRMapCompiler& compiler,
+                                                                               emitters::IRFunctionEmitter& function,
+                                                                               size_t dimension,
+                                                                               emitters::LLVMValue input1,
+                                                                               emitters::LLVMValue input2,
+                                                                               emitters::LLVMValue output,
+                                                                               emitters::LLVMValue prevInputDimensionOffset,
+                                                                               emitters::LLVMValue prevOutputDimensionOffset) const
+    {
+        auto outputLayout = _output.GetMemoryLayout();
+        const auto numDimensions = _inputLayout.NumDimensions();
+        auto&& inputStride = _inputLayout.GetExtent();
+        auto&& inputOffset = _inputLayout.GetOffset();
+        auto&& inputSize = _inputLayout.GetActiveSize();
+        auto&& outputStride = outputLayout.GetExtent();
+        auto&& outputOffset = outputLayout.GetOffset();
+
+        function.For(inputSize[dimension], [dimension, numDimensions, inputOffset, inputStride, outputOffset, outputStride, prevInputDimensionOffset, prevOutputDimensionOffset, input1, input2, output, &compiler, this](emitters::IRFunctionEmitter& function, emitters::LLVMValue loopIndex) {
+            // Calculate the offset within this dimension = (loopIndex + offset[dimension])
+            emitters::LLVMValue thisInputDimensionInternalOffset = function.Operator(emitters::GetAddForValueType<int>(), loopIndex, function.Literal<int>(inputOffset[dimension]));
+            emitters::LLVMValue thisOutputDimensionInternalOffset = function.Operator(emitters::GetAddForValueType<int>(), loopIndex, function.Literal<int>(outputOffset[dimension]));
+
+            // Calculate the total offset from beginning of memory:
+            //   * if in the outermost loop, the offset into this dimension
+            //   * otherwise, the offset into this dimension plus the previous offset scaled by the previous dimension's stride
+            emitters::LLVMValue thisInputDimensionOffset = nullptr;
+            emitters::LLVMValue thisOutputDimensionOffset = nullptr;
+            if (dimension == 0)
+            {
+                assert(prevInputDimensionOffset == nullptr);
+                assert(prevOutputDimensionOffset == nullptr);
+                thisInputDimensionOffset = thisInputDimensionInternalOffset;
+                thisOutputDimensionOffset = thisOutputDimensionInternalOffset;
+            }
+            else
+            {
+                auto scaledInputDimensionOffset = function.Operator(emitters::GetMultiplyForValueType<int>(), prevInputDimensionOffset, function.Literal<int>(inputStride[dimension]));
+                thisInputDimensionOffset = function.Operator(emitters::GetAddForValueType<int>(), scaledInputDimensionOffset, thisInputDimensionInternalOffset);
+
+                auto scaledOutputDimensionOffset = function.Operator(emitters::GetMultiplyForValueType<int>(), prevOutputDimensionOffset, function.Literal<int>(outputStride[dimension]));
+                thisOutputDimensionOffset = function.Operator(emitters::GetAddForValueType<int>(), scaledOutputDimensionOffset, thisOutputDimensionInternalOffset);
+            }
+
+            if (static_cast<int>(dimension) < numDimensions - 1)
+            {
+                // Recursive call to emit nested loop
+                EmitComputeDimensionLoop(compiler, function, dimension + 1, input1, input2, output, thisInputDimensionOffset, thisOutputDimensionOffset);
+            }
+            else
+            {
+                // We're in the innermost loop --- compute the value
+                auto value1 = function.ValueAt(input1, thisInputDimensionOffset);
+                auto value2 = function.ValueAt(input2, thisInputDimensionOffset);
+                auto outputValue = _function.Compile(function, value1, value2);
+                function.SetValueAt(output, thisOutputDimensionOffset, outputValue);
+            }
+        });
+    }
+
+    template <typename ValueType, typename FunctionType>
+    void BinaryFunctionNode<ValueType, FunctionType>::Copy(model::ModelTransformer& transformer) const
+    {
+        auto outputLayout = _output.GetMemoryLayout();
+        const auto& portElements1 = transformer.GetCorrespondingInputs(_input1);
+        const auto& portElements2 = transformer.GetCorrespondingInputs(_input2);
+        auto newNode = transformer.AddNode<BinaryFunctionNode<ValueType, FunctionType>>(portElements1, portElements2, _inputLayout, outputLayout, _function, _paddingValue);
+        transformer.MapNodeOutput(output, newNode->output);
+    }
+
+    template <typename ValueType, typename FunctionType>
+    ell::utilities::ArchiveVersion BinaryFunctionNode<ValueType, FunctionType>::GetArchiveVersion() const
+    {
+        return { ell::utilities::ArchiveVersionNumbers::v8_port_memory_layout };
+    }
+
+    template <typename ValueType, typename FunctionType>
+    void BinaryFunctionNode<ValueType, FunctionType>::WriteToArchive(utilities::Archiver& archiver) const
+    {
+        model::CompilableNode::WriteToArchive(archiver);
+        archiver[defaultInput1PortName] << _input1;
+        archiver[defaultInput2PortName] << _input2;
+        archiver["paddingValue"] << _paddingValue;
+        archiver["inputLayout"] << _inputLayout;
+        archiver["outputLayout"] << _output.GetMemoryLayout();
+    }
+
+    template <typename ValueType, typename FunctionType>
+    void BinaryFunctionNode<ValueType, FunctionType>::ReadFromArchive(utilities::Unarchiver& archiver)
+    {
+        model::CompilableNode::ReadFromArchive(archiver);
+        archiver[defaultInput1PortName] >> _input1;
+        archiver[defaultInput2PortName] >> _input2;
+        archiver["paddingValue"] >> _paddingValue;
+        archiver["inputLayout"] >> _inputLayout;
+        model::PortMemoryLayout outputLayout;
+        archiver["outputLayout"] >> outputLayout;
+        _output.SetMemoryLayout(outputLayout);
+    }
+} // namespace nodes
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/nodes/include/BinaryOperationNode.h b/libraries/nodes/include/BinaryOperationNode.h
index 8cb5df446..308becd46 100644
--- a/libraries/nodes/include/BinaryOperationNode.h
+++ b/libraries/nodes/include/BinaryOperationNode.h
@@ -151,4 +151,515 @@ namespace nodes
 } // namespace nodes
 } // namespace ell
 
-#include "../tcc/BinaryOperationNode.tcc"
\ No newline at end of file
+#pragma region implementation
+
+#define ADD_TO_STRING_ENTRY(NAMESPACE, OPERATOR) \
+    case NAMESPACE::OPERATOR:                    \
+        return #OPERATOR;
+#define BEGIN_FROM_STRING if (false)
+#define ADD_FROM_STRING_ENTRY(NAMESPACE, OPERATOR) else if (name == #OPERATOR) return NAMESPACE::OPERATOR
+
+namespace ell
+{
+namespace nodes
+{
+    namespace BinaryOperations
+    {
+        inline std::string to_string(emitters::BinaryOperationType op)
+        {
+            switch (op)
+            {
+                ADD_TO_STRING_ENTRY(emitters::BinaryOperationType, none);
+                ADD_TO_STRING_ENTRY(emitters::BinaryOperationType, add);
+                ADD_TO_STRING_ENTRY(emitters::BinaryOperationType, subtract);
+                ADD_TO_STRING_ENTRY(emitters::BinaryOperationType, coordinatewiseMultiply);
+                ADD_TO_STRING_ENTRY(emitters::BinaryOperationType, coordinatewiseDivide);
+                ADD_TO_STRING_ENTRY(emitters::BinaryOperationType, logicalAnd);
+                ADD_TO_STRING_ENTRY(emitters::BinaryOperationType, logicalOr);
+                ADD_TO_STRING_ENTRY(emitters::BinaryOperationType, logicalXor);
+            default:
+                throw utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "Unknown binary operation");
+            }
+        }
+
+        inline emitters::BinaryOperationType from_string(std::string name)
+        {
+            BEGIN_FROM_STRING;
+            ADD_FROM_STRING_ENTRY(emitters::BinaryOperationType, none);
+            ADD_FROM_STRING_ENTRY(emitters::BinaryOperationType, add);
+            ADD_FROM_STRING_ENTRY(emitters::BinaryOperationType, subtract);
+            ADD_FROM_STRING_ENTRY(emitters::BinaryOperationType, coordinatewiseMultiply);
+            ADD_FROM_STRING_ENTRY(emitters::BinaryOperationType, coordinatewiseDivide);
+            ADD_FROM_STRING_ENTRY(emitters::BinaryOperationType, logicalAnd);
+            ADD_FROM_STRING_ENTRY(emitters::BinaryOperationType, logicalOr);
+            ADD_FROM_STRING_ENTRY(emitters::BinaryOperationType, logicalXor);
+
+            throw utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "Unknown binary operation");
+        }
+
+        template <typename ValueType>
+        ValueType Add(ValueType a, ValueType b)
+        {
+            return a + b;
+        }
+
+        template <>
+        inline bool Add(bool a, bool b)
+        {
+            throw utilities::InputException(utilities::InputExceptionErrors::typeMismatch);
+        }
+
+        template <typename ValueType>
+        ValueType Subtract(ValueType a, ValueType b)
+        {
+            return a - b;
+        }
+
+        template <>
+        inline bool Subtract(bool a, bool b)
+        {
+            throw utilities::InputException(utilities::InputExceptionErrors::typeMismatch);
+        }
+
+        template <typename ValueType>
+        ValueType Multiply(ValueType a, ValueType b)
+        {
+            return a * b;
+        }
+
+        template <>
+        inline bool Multiply(bool a, bool b)
+        {
+            throw utilities::InputException(utilities::InputExceptionErrors::typeMismatch);
+        }
+
+        template <typename ValueType>
+        ValueType Divide(ValueType a, ValueType b)
+        {
+            return a / b;
+        }
+
+        template <>
+        inline bool Divide(bool a, bool b)
+        {
+            throw utilities::InputException(utilities::InputExceptionErrors::typeMismatch);
+        }
+
+        //
+        // Logical operations
+        //
+        template <typename ValueType>
+        ValueType LogicalAnd(ValueType a, ValueType b)
+        {
+            throw utilities::InputException(utilities::InputExceptionErrors::typeMismatch);
+        }
+
+        template <>
+        inline bool LogicalAnd(bool a, bool b)
+        {
+            return a && b;
+        }
+
+        template <typename ValueType>
+        ValueType LogicalOr(ValueType a, ValueType b)
+        {
+            throw utilities::InputException(utilities::InputExceptionErrors::typeMismatch);
+        }
+
+        template <>
+        inline bool LogicalOr(bool a, bool b)
+        {
+            return a || b;
+        }
+
+        template <typename ValueType>
+        ValueType LogicalXor(ValueType a, ValueType b)
+        {
+            throw utilities::InputException(utilities::InputExceptionErrors::typeMismatch);
+        }
+
+        template <>
+        inline bool LogicalXor(bool a, bool b)
+        {
+            return (!a) != (!b);
+        }
+    } // namespace BinaryOperations
+
+    template <typename ValueType>
+    BinaryOperationNode<ValueType>::BinaryOperationNode() :
+        CompilableNode({ &_input1, &_input2 }, { &_output }),
+        _input1(this, {}, defaultInput1PortName),
+        _input2(this, {}, defaultInput2PortName),
+        _output(this, defaultOutputPortName, 0),
+        _operation(emitters::BinaryOperationType::none)
+    {
+    }
+
+    template <typename ValueType>
+    BinaryOperationNode<ValueType>::BinaryOperationNode(const model::OutputPort<ValueType>& input1, const model::OutputPort<ValueType>& input2, emitters::BinaryOperationType operation) :
+        CompilableNode({ &_input1, &_input2 }, { &_output }),
+        _input1(this, input1, defaultInput1PortName),
+        _inputLayout1(input1.GetMemoryLayout()),
+        _input2(this, input2, defaultInput2PortName),
+        _inputLayout2(input2.GetMemoryLayout()),
+        _output(this, defaultOutputPortName, input1.GetMemoryLayout()),
+        _operation(operation),
+        _paddingValue(0)
+    {
+        if (_inputLayout1.GetActiveSize() != _inputLayout2.GetActiveSize())
+        {
+            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "Active areas must match for both inputs");
+        }
+    }
+
+    template <typename ValueType>
+    BinaryOperationNode<ValueType>::BinaryOperationNode(const model::OutputPort<ValueType>& input1,
+                                                        const model::OutputPort<ValueType>& input2,
+                                                        const model::PortMemoryLayout& layout,
+                                                        emitters::BinaryOperationType operation,
+                                                        ValueType padding) :
+        CompilableNode({ &_input1, &_input2 }, { &_output }),
+        _input1(this, input1, defaultInput1PortName),
+        _inputLayout1(layout),
+        _input2(this, input2, defaultInput2PortName),
+        _inputLayout2(layout),
+        _output(this, defaultOutputPortName, layout),
+        _operation(operation),
+        _paddingValue(padding)
+    {
+    }
+
+    template <typename ValueType>
+    BinaryOperationNode<ValueType>::BinaryOperationNode(const model::OutputPort<ValueType>& input1,
+                                                        const model::PortMemoryLayout& inputLayout1,
+                                                        const model::OutputPort<ValueType>& input2,
+                                                        const model::PortMemoryLayout& inputLayout2,
+                                                        const model::PortMemoryLayout& outputLayout,
+                                                        emitters::BinaryOperationType operation,
+                                                        ValueType padding) :
+        CompilableNode({ &_input1, &_input2 }, { &_output }),
+        _input1(this, input1, defaultInput1PortName),
+        _inputLayout1(inputLayout1),
+        _input2(this, input2, defaultInput2PortName),
+        _inputLayout2(inputLayout2),
+        _output(this, defaultOutputPortName, outputLayout),
+        _operation(operation),
+        _paddingValue(padding)
+    {
+        if (inputLayout1.GetActiveSize() != inputLayout2.GetActiveSize())
+        {
+            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "Active areas must match for both inputs");
+        }
+        if (inputLayout1.GetActiveSize() != outputLayout.GetActiveSize())
+        {
+            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "Input and output active areas must match");
+        }
+    }
+
+    template <typename ValueType>
+    template <typename Operation>
+    std::vector<ValueType> BinaryOperationNode<ValueType>::ComputeOutput(Operation&& function) const
+    {
+        auto outputLayout = _output.GetMemoryLayout();
+        auto outputSize = outputLayout.GetExtent().NumElements();
+        auto output = std::vector<ValueType>(outputSize);
+
+        const size_t prevInput1Offset = 0;
+        const size_t prevInput2Offset = 0;
+        const size_t prevOutputOffset = 0;
+        ComputeDimensionLoop(function, 0, output, prevInput1Offset, prevInput2Offset, prevOutputOffset);
+
+        return output;
+    }
+
+    template <typename ValueType>
+    void BinaryOperationNode<ValueType>::Compute() const
+    {
+        std::vector<ValueType> output;
+        switch (_operation)
+        {
+        case emitters::BinaryOperationType::add:
+            output = ComputeOutput(BinaryOperations::Add<ValueType>);
+            break;
+        case emitters::BinaryOperationType::subtract:
+            output = ComputeOutput(BinaryOperations::Subtract<ValueType>);
+            break;
+        case emitters::BinaryOperationType::coordinatewiseMultiply:
+            output = ComputeOutput(BinaryOperations::Multiply<ValueType>);
+            break;
+        case emitters::BinaryOperationType::coordinatewiseDivide:
+            output = ComputeOutput(BinaryOperations::Divide<ValueType>);
+            break;
+        case emitters::BinaryOperationType::logicalAnd:
+            output = ComputeOutput(BinaryOperations::LogicalAnd<ValueType>);
+            break;
+        case emitters::BinaryOperationType::logicalOr:
+            output = ComputeOutput(BinaryOperations::LogicalOr<ValueType>);
+            break;
+        case emitters::BinaryOperationType::logicalXor:
+            output = ComputeOutput(BinaryOperations::LogicalXor<ValueType>);
+            break;
+        default:
+            throw utilities::LogicException(utilities::LogicExceptionErrors::notImplemented, "Unknown operation type");
+        }
+        _output.SetOutput(output);
+    };
+
+    template <typename ValueType>
+    void BinaryOperationNode<ValueType>::Copy(model::ModelTransformer& transformer) const
+    {
+        const auto& PortElements1 = transformer.GetCorrespondingInputs(_input1);
+        const auto& PortElements2 = transformer.GetCorrespondingInputs(_input2);
+        auto outputLayout = _output.GetMemoryLayout();
+        auto newNode = transformer.AddNode<BinaryOperationNode<ValueType>>(PortElements1, _inputLayout1, PortElements2, _inputLayout2, outputLayout, _operation);
+        transformer.MapNodeOutput(output, newNode->output);
+    }
+
+    template <typename ValueType>
+    void BinaryOperationNode<ValueType>::Compile(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
+    {
+        if (_inputLayout1.GetMemorySize() == _inputLayout2.GetMemorySize())
+        {
+            if (!compiler.GetCompilerOptions().unrollLoops)
+            {
+                CompileLoop(compiler, function);
+            }
+            else
+            {
+                CompileExpanded(compiler, function);
+            }
+        }
+        else
+        {
+            emitters::LLVMValue pInput1 = compiler.EnsurePortEmitted(input1);
+            emitters::LLVMValue pInput2 = compiler.EnsurePortEmitted(input2);
+            emitters::LLVMValue pResult = compiler.EnsurePortEmitted(output, _paddingValue);
+
+            // Call recursive function to emit nested loops
+            emitters::LLVMValue prevInput1DimensionOffset = nullptr;
+            emitters::LLVMValue prevInput2DimensionOffset = nullptr;
+            emitters::LLVMValue prevOutputDimensionOffset = nullptr;
+            EmitComputeDimensionLoop(compiler, function, 0, pInput1, pInput2, pResult, prevInput1DimensionOffset, prevInput2DimensionOffset, prevOutputDimensionOffset);
+        }
+    }
+
+    template <typename ValueType>
+    void BinaryOperationNode<ValueType>::CompileLoop(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
+    {
+        emitters::LLVMValue pInput1 = compiler.EnsurePortEmitted(input1);
+        emitters::LLVMValue pInput2 = compiler.EnsurePortEmitted(input2);
+        emitters::LLVMValue pResult = compiler.EnsurePortEmitted(output);
+
+        auto count = input1.Size();
+        function.VectorOperator(emitters::GetOperator<ValueType>(GetOperation()), count, pInput1, pInput2, [&pResult, &function](emitters::LLVMValue i, emitters::LLVMValue pValue) {
+            function.SetValueAt(pResult, i, pValue);
+        });
+    }
+
+    template <typename ValueType>
+    void BinaryOperationNode<ValueType>::CompileExpanded(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
+    {
+        emitters::LLVMValue pResult = compiler.EnsurePortEmitted(output);
+
+        auto count = input1.Size();
+        for (size_t i = 0; i < count; ++i)
+        {
+            emitters::LLVMValue inputValue1 = compiler.LoadPortElementVariable(input1.GetInputElement(i));
+            emitters::LLVMValue inputValue2 = compiler.LoadPortElementVariable(input2.GetInputElement(i));
+            emitters::LLVMValue pOpResult = function.Operator(emitters::GetOperator<ValueType>(GetOperation()), inputValue1, inputValue2);
+            function.SetValueAt(pResult, function.Literal<int>(i), pOpResult);
+        }
+    }
+
+    //
+    // Arbitrary-depth nested loops are generated recursively. The ComputeDimensionLoop
+    // function emits `numDimensions` nested loops of the form:
+    //
+    // for(iz = 0; iz < sz; ++iz)
+    // {
+    //     zOffset = (iz+offset[2]) * stride[2];
+    //     for(iy = 0; iy < sy; ++iy)
+    //     {
+    //         yOffset = zOffset + (iy+offset[1]) * stride[1];
+    //         for(ix = 0; ix < sx; ++ix)
+    //         {
+    //             offset = yOffset + (ix+offset[0]) * stride[0];
+    //             x = arr[offset];
+    //             val = f(x);
+    //             output[offset] = val;
+    //         }
+    //     }
+    // }
+    //
+
+    template <typename ValueType>
+    template <typename Operation>
+    void BinaryOperationNode<ValueType>::ComputeDimensionLoop(Operation& function,
+                                                              size_t dimension,
+                                                              std::vector<ValueType>& output,
+                                                              size_t prevInput1DimensionOffset,
+                                                              size_t prevInput2DimensionOffset,
+                                                              size_t prevOutputDimensionOffset) const
+    {
+        auto outputLayout = _output.GetMemoryLayout();
+        const auto numDimensions = _inputLayout1.NumDimensions();
+        auto&& inputStride1 = _inputLayout1.GetExtent();
+        auto&& inputOffset1 = _inputLayout1.GetOffset();
+        auto&& inputStride2 = _inputLayout2.GetExtent();
+        auto&& inputOffset2 = _inputLayout2.GetOffset();
+        auto&& inputSize = _inputLayout1.GetActiveSize();
+        auto&& outputOffset = outputLayout.GetOffset();
+        auto&& outputStride = outputLayout.GetExtent();
+
+        for (int loopIndex = 0; loopIndex < inputSize[dimension]; ++loopIndex)
+        {
+            // offset within start of this dimension = (loopIndex + offset[dimension])
+            auto thisInput1DimensionInternalOffset = loopIndex + inputOffset1[dimension];
+            auto thisInput2DimensionInternalOffset = loopIndex + inputOffset2[dimension];
+            auto thisOutputDimensionInternalOffset = loopIndex + outputOffset[dimension];
+
+            size_t thisInput1DimensionOffset = thisInput1DimensionInternalOffset;
+            size_t thisInput2DimensionOffset = thisInput2DimensionInternalOffset;
+            size_t thisOutputDimensionOffset = thisOutputDimensionInternalOffset;
+            if (dimension != 0)
+            {
+                thisInput1DimensionOffset += prevInput1DimensionOffset * inputStride1[dimension];
+                thisInput2DimensionOffset += prevInput2DimensionOffset * inputStride2[dimension];
+                thisOutputDimensionOffset += prevOutputDimensionOffset * outputStride[dimension];
+            }
+
+            if (static_cast<int>(dimension) < numDimensions - 1)
+            {
+                // Recursive call to emit nested loop
+                ComputeDimensionLoop(function, dimension + 1, output, thisInput1DimensionOffset, thisInput2DimensionOffset, thisOutputDimensionOffset);
+            }
+            else
+            {
+                // We're in the innermost loop --- compute the value
+                auto value1 = _input1[thisInput1DimensionOffset];
+                auto value2 = _input2[thisInput2DimensionOffset];
+                auto outputValue = function(value1, value2);
+                output[thisOutputDimensionOffset] = outputValue;
+            }
+        }
+    }
+
+    template <typename ValueType>
+    void BinaryOperationNode<ValueType>::EmitComputeDimensionLoop(model::IRMapCompiler& compiler,
+                                                                  emitters::IRFunctionEmitter& function,
+                                                                  size_t dimension,
+                                                                  emitters::LLVMValue input1,
+                                                                  emitters::LLVMValue input2,
+                                                                  emitters::LLVMValue output,
+                                                                  emitters::LLVMValue prevInput1DimensionOffset,
+                                                                  emitters::LLVMValue prevInput2DimensionOffset,
+                                                                  emitters::LLVMValue prevOutputDimensionOffset) const
+    {
+        auto outputLayout = _output.GetMemoryLayout();
+        const auto numDimensions = _inputLayout1.NumDimensions();
+        auto&& inputStride1 = _inputLayout1.GetExtent();
+        auto&& inputOffset1 = _inputLayout1.GetOffset();
+        auto&& inputStride2 = _inputLayout2.GetExtent();
+        auto&& inputOffset2 = _inputLayout2.GetOffset();
+        auto&& inputSize = _inputLayout1.GetActiveSize();
+        auto&& outputStride = outputLayout.GetExtent();
+        auto&& outputOffset = outputLayout.GetOffset();
+
+        function.For(inputSize[dimension], [input1, input2, output, inputOffset1, inputOffset2, inputStride1, inputStride2, outputStride, outputOffset, prevInput1DimensionOffset, prevInput2DimensionOffset, prevOutputDimensionOffset, dimension, numDimensions, &compiler, this](emitters::IRFunctionEmitter& function, emitters::LLVMValue loopIndex) {
+            // Calculate the offset within this dimension = (loopIndex + offset[dimension])
+            emitters::LLVMValue thisInput1DimensionInternalOffset = function.Operator(emitters::GetAddForValueType<int>(), loopIndex, function.Literal<int>(inputOffset1[dimension]));
+            emitters::LLVMValue thisInput2DimensionInternalOffset = function.Operator(emitters::GetAddForValueType<int>(), loopIndex, function.Literal<int>(inputOffset2[dimension]));
+            emitters::LLVMValue thisOutputDimensionInternalOffset = function.Operator(emitters::GetAddForValueType<int>(), loopIndex, function.Literal<int>(outputOffset[dimension]));
+
+            // Calculate the total offset from beginning of memory:
+            //   * if in the outermost loop, the offset into this dimension
+            //   * otherwise, the offset into this dimension plus the previous offset scaled by the previous dimension's stride
+            emitters::LLVMValue thisInput1DimensionOffset = nullptr;
+            emitters::LLVMValue thisInput2DimensionOffset = nullptr;
+            emitters::LLVMValue thisOutputDimensionOffset = nullptr;
+            if (dimension == 0)
+            {
+                assert(prevInput1DimensionOffset == nullptr);
+                assert(prevInput2DimensionOffset == nullptr);
+                assert(prevOutputDimensionOffset == nullptr);
+                thisInput1DimensionOffset = thisInput1DimensionInternalOffset;
+                thisInput2DimensionOffset = thisInput2DimensionInternalOffset;
+                thisOutputDimensionOffset = thisOutputDimensionInternalOffset;
+            }
+            else
+            {
+                auto scaledInput1DimensionOffset = function.Operator(emitters::GetMultiplyForValueType<int>(), prevInput1DimensionOffset, function.Literal<int>(inputStride1[dimension]));
+                auto scaledInput2DimensionOffset = function.Operator(emitters::GetMultiplyForValueType<int>(), prevInput2DimensionOffset, function.Literal<int>(inputStride2[dimension]));
+                thisInput1DimensionOffset = function.Operator(emitters::GetAddForValueType<int>(), scaledInput1DimensionOffset, thisInput1DimensionInternalOffset);
+                thisInput2DimensionOffset = function.Operator(emitters::GetAddForValueType<int>(), scaledInput2DimensionOffset, thisInput2DimensionInternalOffset);
+
+                auto scaledOutputDimensionOffset = function.Operator(emitters::GetMultiplyForValueType<int>(), prevOutputDimensionOffset, function.Literal<int>(outputStride[dimension]));
+                thisOutputDimensionOffset = function.Operator(emitters::GetAddForValueType<int>(), scaledOutputDimensionOffset, thisOutputDimensionInternalOffset);
+            }
+
+            if (static_cast<int>(dimension) < numDimensions - 1)
+            {
+                // Recursive call to emit nested loop
+                EmitComputeDimensionLoop(compiler, function, dimension + 1, input1, input2, output, thisInput1DimensionOffset, thisInput2DimensionOffset, thisOutputDimensionOffset);
+            }
+            else
+            {
+                // We're in the innermost loop --- compute the value
+                auto value1 = function.ValueAt(input1, thisInput1DimensionOffset);
+                auto value2 = function.ValueAt(input2, thisInput2DimensionOffset);
+                auto outputValue = function.Operator(emitters::GetOperator<ValueType>(GetOperation()), value1, value2);
+                function.SetValueAt(output, thisOutputDimensionOffset, outputValue);
+            }
+        });
+    }
+
+    template <typename ValueType>
+    utilities::ArchiveVersion BinaryOperationNode<ValueType>::GetArchiveVersion() const
+    {
+        constexpr utilities::ArchiveVersion archiveVersion = { utilities::ArchiveVersionNumbers::v7_binary_operation_active_regions };
+
+        return archiveVersion;
+    }
+
+    template <typename ValueType>
+    bool BinaryOperationNode<ValueType>::CanReadArchiveVersion(const utilities::ArchiveVersion& version) const
+    {
+        constexpr utilities::ArchiveVersion archiveVersion = { utilities::ArchiveVersionNumbers::v7_binary_operation_active_regions };
+
+        return version >= archiveVersion;
+    }
+
+    template <typename ValueType>
+    void BinaryOperationNode<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
+    {
+        Node::WriteToArchive(archiver);
+        archiver[defaultInput1PortName] << _input1;
+        archiver[defaultInput2PortName] << _input2;
+        archiver["inputLayout1"] << _inputLayout1;
+        archiver["inputLayout2"] << _inputLayout2;
+        archiver["operation"] << BinaryOperations::to_string(_operation);
+        auto outputLayout = _output.GetMemoryLayout();
+        archiver["outputLayout"] << outputLayout;
+        archiver["padding"] << _paddingValue;
+    }
+
+    template <typename ValueType>
+    void BinaryOperationNode<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
+    {
+        Node::ReadFromArchive(archiver);
+        archiver[defaultInput1PortName] >> _input1;
+        archiver[defaultInput2PortName] >> _input2;
+        archiver["inputLayout1"] >> _inputLayout1;
+        archiver["inputLayout2"] >> _inputLayout2;
+        std::string operation;
+        archiver["operation"] >> operation;
+        _operation = BinaryOperations::from_string(operation);
+        model::PortMemoryLayout outputLayout;
+        archiver["outputLayout"] >> outputLayout;
+        _output.SetMemoryLayout(outputLayout);
+        archiver["padding"] >> _paddingValue;
+    }
+} // namespace nodes
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/nodes/include/BinaryPredicateNode.h b/libraries/nodes/include/BinaryPredicateNode.h
index f1d1daddc..3c4ffc027 100644
--- a/libraries/nodes/include/BinaryPredicateNode.h
+++ b/libraries/nodes/include/BinaryPredicateNode.h
@@ -95,4 +95,229 @@ namespace nodes
 } // namespace nodes
 } // namespace ell
 
-#include "../tcc/BinaryPredicateNode.tcc"
\ No newline at end of file
+#pragma region implementation
+
+#define ADD_TO_STRING_ENTRY(NAMESPACE, OPERATOR) \
+    case NAMESPACE::OPERATOR:                    \
+        return #OPERATOR;
+#define BEGIN_FROM_STRING if (false)
+#define ADD_FROM_STRING_ENTRY(NAMESPACE, OPERATOR) else if (name == #OPERATOR) return NAMESPACE::OPERATOR
+
+namespace ell
+{
+namespace nodes
+{
+    namespace BinaryPredicates
+    {
+        inline std::string to_string(emitters::BinaryPredicateType op)
+        {
+            switch (op)
+            {
+                ADD_TO_STRING_ENTRY(emitters::BinaryPredicateType, none);
+                ADD_TO_STRING_ENTRY(emitters::BinaryPredicateType, equal);
+                ADD_TO_STRING_ENTRY(emitters::BinaryPredicateType, less);
+                ADD_TO_STRING_ENTRY(emitters::BinaryPredicateType, greater);
+                ADD_TO_STRING_ENTRY(emitters::BinaryPredicateType, notEqual);
+                ADD_TO_STRING_ENTRY(emitters::BinaryPredicateType, lessOrEqual);
+                ADD_TO_STRING_ENTRY(emitters::BinaryPredicateType, greaterOrEqual);
+            default:
+                throw utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "Unknown binary predicate");
+            }
+        }
+
+        inline emitters::BinaryPredicateType from_string(std::string name)
+        {
+            BEGIN_FROM_STRING;
+            ADD_FROM_STRING_ENTRY(emitters::BinaryPredicateType, none);
+            ADD_FROM_STRING_ENTRY(emitters::BinaryPredicateType, equal);
+            ADD_FROM_STRING_ENTRY(emitters::BinaryPredicateType, less);
+            ADD_FROM_STRING_ENTRY(emitters::BinaryPredicateType, greater);
+            ADD_FROM_STRING_ENTRY(emitters::BinaryPredicateType, notEqual);
+            ADD_FROM_STRING_ENTRY(emitters::BinaryPredicateType, lessOrEqual);
+            ADD_FROM_STRING_ENTRY(emitters::BinaryPredicateType, greaterOrEqual);
+
+            throw utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "Unknown binary predicate");
+        }
+
+        template <typename ValueType>
+        bool Equal(ValueType a, ValueType b)
+        {
+            return a == b;
+        }
+
+        template <typename ValueType>
+        bool Less(ValueType a, ValueType b)
+        {
+            return a < b;
+        }
+
+        template <typename ValueType>
+        bool Greater(ValueType a, ValueType b)
+        {
+            return a > b;
+        }
+
+        template <typename ValueType>
+        bool NotEqual(ValueType a, ValueType b)
+        {
+            return a != b;
+        }
+
+        template <typename ValueType>
+        bool LessOrEqual(ValueType a, ValueType b)
+        {
+            return a <= b;
+        }
+
+        template <typename ValueType>
+        bool GreaterOrEqual(ValueType a, ValueType b)
+        {
+            return a >= b;
+        }
+    } // namespace BinaryPredicates
+
+    template <typename ValueType>
+    BinaryPredicateNode<ValueType>::BinaryPredicateNode() :
+        CompilableNode({ &_input1, &_input2 }, { &_output }),
+        _input1(this, {}, defaultInput1PortName),
+        _input2(this, {}, defaultInput2PortName),
+        _output(this, defaultOutputPortName, 0),
+        _predicate(emitters::BinaryPredicateType::none)
+    {
+    }
+
+    template <typename ValueType>
+    BinaryPredicateNode<ValueType>::BinaryPredicateNode(const model::OutputPort<ValueType>& input1, const model::OutputPort<ValueType>& input2, emitters::BinaryPredicateType predicate) :
+        CompilableNode({ &_input1, &_input2 }, { &_output }),
+        _input1(this, input1, defaultInput1PortName),
+        _input2(this, input2, defaultInput2PortName),
+        _output(this, defaultOutputPortName, _input1.Size()),
+        _predicate(predicate)
+    {
+        if (input1.Size() != input2.Size())
+        {
+            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "Input sizes must match");
+        }
+        assert(input1.Size() == input2.Size());
+    }
+
+    template <typename ValueType>
+    template <typename Operation>
+    std::vector<bool> BinaryPredicateNode<ValueType>::ComputeOutput(Operation&& fn) const
+    {
+        auto output = std::vector<bool>(_input1.Size());
+        for (size_t index = 0; index < _input1.Size(); index++)
+        {
+            output[index] = fn(_input1[index], _input2[index]);
+        }
+        return output;
+    }
+
+    template <typename ValueType>
+    void BinaryPredicateNode<ValueType>::Compute() const
+    {
+        std::vector<bool> output;
+        switch (_predicate)
+        {
+        case emitters::BinaryPredicateType::equal:
+            output = ComputeOutput(BinaryPredicates::Equal<ValueType>);
+            break;
+        case emitters::BinaryPredicateType::less:
+            output = ComputeOutput(BinaryPredicates::Less<ValueType>);
+            break;
+        case emitters::BinaryPredicateType::greater:
+            output = ComputeOutput(BinaryPredicates::Greater<ValueType>);
+            break;
+        case emitters::BinaryPredicateType::notEqual:
+            output = ComputeOutput(BinaryPredicates::NotEqual<ValueType>);
+            break;
+        case emitters::BinaryPredicateType::lessOrEqual:
+            output = ComputeOutput(BinaryPredicates::LessOrEqual<ValueType>);
+            break;
+        case emitters::BinaryPredicateType::greaterOrEqual:
+            output = ComputeOutput(BinaryPredicates::GreaterOrEqual<ValueType>);
+            break;
+        default:
+            throw utilities::LogicException(utilities::LogicExceptionErrors::notImplemented, "Unknown predicate type");
+        }
+        _output.SetOutput(output);
+    };
+
+    template <typename ValueType>
+    void BinaryPredicateNode<ValueType>::Copy(model::ModelTransformer& transformer) const
+    {
+        const auto& portElements1 = transformer.GetCorrespondingInputs(_input1);
+        const auto& portElements2 = transformer.GetCorrespondingInputs(_input2);
+        auto newNode = transformer.AddNode<BinaryPredicateNode<ValueType>>(portElements1, portElements2, _predicate);
+        transformer.MapNodeOutput(output, newNode->output);
+    }
+
+    template <typename ValueType>
+    void BinaryPredicateNode<ValueType>::Compile(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
+    {
+        if (!compiler.GetCompilerOptions().unrollLoops)
+        {
+            CompileLoop(compiler, function);
+        }
+        else
+        {
+            CompileExpanded(compiler, function);
+        }
+    }
+
+    template <typename ValueType>
+    void BinaryPredicateNode<ValueType>::CompileLoop(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
+    {
+        emitters::LLVMValue pInput1 = compiler.EnsurePortEmitted(input1);
+        emitters::LLVMValue pInput2 = compiler.EnsurePortEmitted(input2);
+        emitters::LLVMValue pResult = compiler.EnsurePortEmitted(output);
+        emitters::TypedComparison cmp = emitters::GetComparison<ValueType>(GetPredicate());
+
+        function.For(input1.Size(), [pInput1, pInput2, pResult, cmp](emitters::IRFunctionEmitter& function, emitters::LLVMValue i) {
+            emitters::LLVMValue inputValue1 = function.ValueAt(pInput1, i);
+            emitters::LLVMValue inputValue2 = function.ValueAt(pInput2, i);
+            emitters::LLVMValue pOpResult = function.Comparison(cmp, inputValue1, inputValue2);
+            // LLVM internally uses 1 bit for boolean. We use integers to store boolean results. That requires a typecast in LLVM
+            function.SetValueAt(pResult, i, function.CastBoolToByte(pOpResult));
+        });
+    }
+
+    template <typename ValueType>
+    void BinaryPredicateNode<ValueType>::CompileExpanded(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
+    {
+        emitters::LLVMValue pResult = compiler.EnsurePortEmitted(output);
+
+        auto count = input1.Size();
+        for (size_t i = 0; i < count; ++i)
+        {
+            emitters::LLVMValue inputValue1 = compiler.LoadPortElementVariable(input1.GetInputElement(i));
+            emitters::LLVMValue inputValue2 = compiler.LoadPortElementVariable(input2.GetInputElement(i));
+            emitters::LLVMValue pOpResult = function.Comparison(emitters::GetComparison<ValueType>(GetPredicate()), inputValue1, inputValue2);
+            function.SetValueAt(pResult, function.Literal((int)i), function.CastBoolToByte(pOpResult));
+        }
+    }
+
+    template <typename ValueType>
+    void BinaryPredicateNode<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
+    {
+        Node::WriteToArchive(archiver);
+        archiver[defaultInput1PortName] << _input1;
+        archiver[defaultInput2PortName] << _input2;
+        archiver["predicate"] << BinaryPredicates::to_string(_predicate);
+    }
+
+    template <typename ValueType>
+    void BinaryPredicateNode<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
+    {
+        Node::ReadFromArchive(archiver);
+        archiver[defaultInput1PortName] >> _input1;
+        archiver[defaultInput2PortName] >> _input2;
+        std::string predicate;
+        archiver["predicate"] >> predicate;
+        _predicate = BinaryPredicates::from_string(predicate);
+        _output.SetSize(_input1.Size());
+    }
+} // namespace nodes
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/nodes/include/BroadcastFunctionNode.h b/libraries/nodes/include/BroadcastFunctionNode.h
index c248746ac..dde75b7c6 100644
--- a/libraries/nodes/include/BroadcastFunctionNode.h
+++ b/libraries/nodes/include/BroadcastFunctionNode.h
@@ -549,4 +549,771 @@ namespace nodes
 } // namespace nodes
 } // namespace ell
 
-#include "../tcc/BroadcastFunctionNode.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace nodes
+{
+    //
+    // BroadcastUnaryFunction
+    //
+    template <typename ValueType>
+    ValueType BroadcastUnaryFunction<ValueType>::Compute(ValueType x, const std::vector<ValueType>& secondaryArgs) const
+    {
+        assert(secondaryArgs.size() == 0);
+        return Compute(x);
+    }
+
+    template <typename ValueType>
+    emitters::LLVMValue BroadcastUnaryFunction<ValueType>::Compile(emitters::IRFunctionEmitter& function, emitters::LLVMValue x, const std::vector<emitters::LLVMValue>& secondaryArgs) const
+    {
+        assert(secondaryArgs.size() == 0);
+        return this->Compile(function, x);
+    }
+
+    //
+    // BroadcastBinaryFunction
+    //
+    template <typename ValueType>
+    ValueType BroadcastBinaryFunction<ValueType>::Compute(ValueType x, const std::vector<ValueType>& secondaryArgs) const
+    {
+        assert(secondaryArgs.size() == 1);
+        return Compute(x, secondaryArgs[0]);
+    }
+
+    template <typename ValueType>
+    emitters::LLVMValue BroadcastBinaryFunction<ValueType>::Compile(emitters::IRFunctionEmitter& function, emitters::LLVMValue x, const std::vector<emitters::LLVMValue>& secondaryArgs) const
+    {
+        assert(secondaryArgs.size() == 1);
+        return this->Compile(function, x, secondaryArgs[0]);
+    }
+
+    //
+    // BroadcastTernaryFunction
+    //
+    template <typename ValueType>
+    ValueType BroadcastTernaryFunction<ValueType>::Compute(ValueType x, const std::vector<ValueType>& secondaryArgs) const
+    {
+        assert(secondaryArgs.size() == 2);
+        return Compute(x, secondaryArgs[0], secondaryArgs[1]);
+    }
+
+    template <typename ValueType>
+    emitters::LLVMValue BroadcastTernaryFunction<ValueType>::Compile(emitters::IRFunctionEmitter& function, emitters::LLVMValue x, const std::vector<emitters::LLVMValue>& secondaryArgs) const
+    {
+        assert(secondaryArgs.size() == 2);
+        return this->Compile(function, x, secondaryArgs[0], secondaryArgs[1]);
+    }
+
+    //
+    // BroadcastLinearFunction
+    //
+    template <typename ValueType>
+    ValueType BroadcastLinearFunction<ValueType>::Compute(ValueType x, ValueType scale, ValueType bias) const
+    {
+        return scale * x + bias;
+    }
+
+    template <typename ValueType>
+    emitters::LLVMValue BroadcastLinearFunction<ValueType>::Compile(emitters::IRFunctionEmitter& function, emitters::LLVMValue x, emitters::LLVMValue scale, emitters::LLVMValue bias) const
+    {
+        if (scale == nullptr) // bias only
+        {
+            return function.Operator(emitters::GetAddForValueType<ValueType>(), x, bias);
+        }
+        else if (bias == nullptr) // scale only
+        {
+            return function.Operator(emitters::GetMultiplyForValueType<ValueType>(), scale, x);
+        }
+        else
+        {
+            return function.Operator(emitters::GetAddForValueType<ValueType>(), function.Operator(emitters::GetMultiplyForValueType<ValueType>(), scale, x), bias);
+        }
+    }
+
+    //
+    // BroadcastFunctionNode
+    //
+
+    template <typename ValueType, typename FunctionType>
+    BroadcastFunctionNode<ValueType, FunctionType>::BroadcastFunctionNode(const std::vector<model::InputPortBase*>& inputs, const std::vector<model::OutputPortBase*>& outputs) :
+        CompilableNode(inputs, outputs),
+        _paddingValue(0)
+    {
+    }
+
+    template <typename ValueType, typename FunctionType>
+    BroadcastFunctionNode<ValueType, FunctionType>::BroadcastFunctionNode(const std::vector<model::InputPortBase*>& inputs,
+                                                                          const model::PortMemoryLayout& inputLayout,
+                                                                          size_t broadcastDimension,
+                                                                          const std::vector<model::OutputPortBase*>& outputs,
+                                                                          const model::PortMemoryLayout& outputLayout,
+                                                                          FunctionType function,
+                                                                          ValueType paddingValue) :
+        CompilableNode(inputs, outputs),
+        _inputLayout(inputLayout),
+        _broadcastDimension(broadcastDimension),
+        _function(function),
+        _paddingValue(paddingValue)
+    {
+    }
+
+    template <typename ValueType, typename FunctionType>
+    model::PortMemoryLayout BroadcastFunctionNode<ValueType, FunctionType>::GetOutputMemoryLayout() const
+    {
+        return GetOutputPort(0)->GetMemoryLayout();
+    }
+
+    //
+    // Arbitrary-depth nested loops are generated recursively. The EmitComputeDimensionLoop
+    // function emits `numDimensions` nested loops of the form:
+    //
+    // for(iz = 0; iz < sz; ++iz)
+    // {
+    //     zOffset = (iz+offset[2]) * stride[2];
+    //     for(iy = 0; iy < sy; ++iy)
+    //     {
+    //         yOffset = zOffset + (iy+offset[1]) * stride[1];
+    //         for(ix = 0; ix < sx; ++ix)
+    //         {
+    //             offset = yOffset + (ix+offset[0]) * stride[0];
+    //             x = arr[offset];
+    //             val = f(x);
+    //             output[offset] = val;
+    //         }
+    //     }
+    // }
+    //
+
+    // Note: secondaryValues is passed by non-const reference to avoid copies. It doesn't function as an output parameter.
+    template <typename ValueType, typename FunctionType>
+    void BroadcastFunctionNode<ValueType, FunctionType>::ComputeDimensionLoop(size_t dimension, std::vector<ValueType>& output, size_t prevInputDimensionOffset, size_t prevOutputDimensionOffset, std::vector<ValueType>& secondaryValues) const
+    {
+        // Note: It should be easy to unroll the last K levels by putting a real loop here when dimension < k
+        //       Or, instead of unrolling, vectorizing:  if broadcastDimension = 1, let secondaryValue be a vector and load it one loop previous
+        //       If broadcastDimension = outermost dimension (0), we may want to parallelize over that dimension
+        const auto numDimensions = NumPrimaryInputDimensions();
+        auto&& inputLayout = GetInputMemoryLayout();
+        auto&& inputStride = inputLayout.GetExtent();
+        auto&& inputOffset = inputLayout.GetOffset();
+        auto&& inputSize = inputLayout.GetActiveSize();
+        auto&& outputLayout = GetOutputMemoryLayout();
+        auto&& outputStride = outputLayout.GetExtent();
+        auto&& outputOffset = outputLayout.GetOffset();
+        auto&& primaryInput = GetPrimaryInput();
+        const auto broadcastDimension = GetBroadcastDimension();
+        const auto numSecondaryInputs = NumSecondaryInputs();
+
+        for (int loopIndex = 0; loopIndex < inputSize[dimension]; ++loopIndex)
+        {
+            // offset within start of this dimension = (loopIndex + offset[dimension])
+            auto thisInputDimensionInternalOffset = loopIndex + inputOffset[dimension];
+            auto thisOutputDimensionInternalOffset = loopIndex + outputOffset[dimension];
+
+            size_t thisInputDimensionOffset = thisInputDimensionInternalOffset;
+            size_t thisOutputDimensionOffset = thisOutputDimensionInternalOffset;
+            if (dimension != 0)
+            {
+                thisInputDimensionOffset += prevInputDimensionOffset * inputStride[dimension];
+                thisOutputDimensionOffset += prevOutputDimensionOffset * outputStride[dimension];
+            }
+
+            if (dimension == broadcastDimension)
+            {
+                for (int index = 0; index < numSecondaryInputs; ++index)
+                {
+                    auto&& secondaryInput = GetSecondaryInput(index);
+                    if (IsSecondaryInputPresent(index))
+                    {
+                        secondaryValues[index] = (*secondaryInput)[loopIndex];
+                    }
+                    else
+                    {
+                        // Dubious hack to deal with linear function nodes missing a coefficient
+                        if (std::is_same<FunctionType, BroadcastLinearFunction<ValueType>>::value && index == 0) // "scale" value, which should be 1 if not specified
+                        {
+                            secondaryValues[index] = static_cast<ValueType>(1.0);
+                        }
+                        else
+                        {
+                            secondaryValues[index] = 0;
+                        }
+                    }
+                }
+            }
+
+            if (dimension < numDimensions - 1)
+            {
+                // Recursive call to emit nested loop
+                ComputeDimensionLoop(dimension + 1, output, thisInputDimensionOffset, thisOutputDimensionOffset, secondaryValues);
+            }
+            else
+            {
+                // We're in the innermost loop --- compute the value
+                auto primaryValue = primaryInput[thisInputDimensionOffset];
+                auto outputValue = GetFunction().Compute(primaryValue, secondaryValues);
+                output[thisOutputDimensionOffset] = outputValue;
+            }
+        }
+    }
+
+    // wrapper around EmitComputeDimensionLoop for use by parallel tasks
+    template <typename ValueType, typename FunctionType>
+    emitters::IRFunctionEmitter BroadcastFunctionNode<ValueType, FunctionType>::GetTaskFunction(model::IRMapCompiler& compiler,
+                                                                                                emitters::IRFunctionEmitter& function,
+                                                                                                const emitters::LLVMTypeList& portTypes) const
+    {
+        auto& module = function.GetModule();
+        auto& emitter = module.GetIREmitter();
+        auto& context = module.GetLLVMContext();
+        auto int32Type = emitter.Type(emitters::VariableType::Int32);
+        auto voidType = llvm::Type::getVoidTy(context);
+
+        // ASSUME dimension == 0 --- we're only parallelizing on the outermost loop
+        int dimension = 0;
+
+        emitters::LLVMTypeList argTypes = portTypes;
+        // int numValuePorts = 2 + NumSecondaryInputs(); // primary input, secondary inputs, output
+        // argTypes.insert(argTypes.end(), numValuePorts, valuePtrType);
+        argTypes.insert(argTypes.end(), 2, int32Type); // begin, end
+
+        auto taskFunction = function.GetModule().BeginFunction(utilities::to_string(GetId()) + "_task", voidType, argTypes);
+        {
+            // get stuff from arguments
+            auto arguments = taskFunction.Arguments().begin();
+            auto primaryInput = &(*arguments++);
+            std::vector<emitters::LLVMValue> secondaryInputs;
+            std::vector<emitters::LLVMValue> secondaryValues;
+            for (int index = 0; index < NumSecondaryInputs(); ++index)
+            {
+                auto secondaryInput = &(*arguments++);
+                // if we really have an input, push it, else push a nullptr (note: we know this at compile-time)
+                if (IsSecondaryInputPresent(index))
+                {
+                    secondaryInputs.push_back(secondaryInput);
+                }
+                else
+                {
+                    secondaryInputs.push_back(nullptr);
+                }
+                secondaryValues.push_back(nullptr);
+            }
+            auto output = &(*arguments++);
+            auto begin = function.LocalScalar(&(*arguments++));
+            auto end = function.LocalScalar(&(*arguments++));
+            auto prevInputDimensionOffset = function.LocalScalar();
+            auto prevOutputDimensionOffset = function.LocalScalar();
+
+            EmitComputeDimensionLoop(compiler, taskFunction, dimension, begin, end, primaryInput, secondaryInputs, output, prevInputDimensionOffset, prevOutputDimensionOffset, secondaryValues);
+            taskFunction.Return();
+        }
+        function.GetModule().EndFunction();
+
+        return taskFunction;
+    }
+
+    // Note: secondaryValues is passed by non-const reference to avoid copies. It doesn't function as an output parameter.
+    template <typename ValueType, typename FunctionType>
+    void BroadcastFunctionNode<ValueType, FunctionType>::EmitComputeDimensionLoop(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function, size_t dimension, emitters::IRLocalScalar begin, emitters::IRLocalScalar end, emitters::LLVMValue primaryInput, const std::vector<emitters::LLVMValue>& secondaryInputs, emitters::LLVMValue output, emitters::IRLocalScalar prevInputDimensionOffset, emitters::IRLocalScalar prevOutputDimensionOffset, std::vector<emitters::LLVMValue>& secondaryValues) const
+    {
+        // Note: It should be easy to unroll the last K levels by putting a real loop here when dimension < k
+        //       Or, instead of unrolling, vectorizing --- if broadcastDimension = 1, let secondaryValue be a vector and load it one loop previous
+        //       If broadcastDimension = outermost dimension (0), we may want to parallelize over that dimension
+        const auto numDimensions = NumPrimaryInputDimensions();
+        auto&& inputLayout = GetInputMemoryLayout();
+        auto&& inputStride = inputLayout.GetExtent();
+        auto&& inputOffset = inputLayout.GetOffset();
+        auto&& inputSize = inputLayout.GetActiveSize();
+        auto&& outputLayout = GetOutputMemoryLayout();
+        auto&& outputStride = outputLayout.GetExtent();
+        auto&& outputOffset = outputLayout.GetOffset();
+        const auto broadcastDimension = GetBroadcastDimension();
+        const auto numSecondaryInputs = NumSecondaryInputs();
+
+        function.For(begin, end, [dimension, numDimensions, inputSize, inputOffset, inputStride, outputOffset, outputStride, broadcastDimension, numSecondaryInputs, prevInputDimensionOffset, prevOutputDimensionOffset, primaryInput, secondaryInputs, output, &secondaryValues, &compiler, this](emitters::IRFunctionEmitter& function, auto loopIndex) {
+            // Calculate the offset within this dimension = (loopIndex + offset[dimension])
+            auto thisInputDimensionInternalOffset = loopIndex + inputOffset[dimension];
+            auto thisOutputDimensionInternalOffset = loopIndex + outputOffset[dimension];
+
+            // Calculate the total offset from beginning of memory:
+            //   * if in the outermost loop, the offset into this dimension
+            //   * otherwise, the offset into this dimension plus the previous offset scaled by the previous dimension's stride
+            auto thisInputDimensionOffset = function.LocalScalar();
+            auto thisOutputDimensionOffset = function.LocalScalar();
+            if (dimension == 0)
+            {
+                assert(!prevInputDimensionOffset.IsValid());
+                assert(!prevOutputDimensionOffset.IsValid());
+                thisInputDimensionOffset = thisInputDimensionInternalOffset;
+                thisOutputDimensionOffset = thisOutputDimensionInternalOffset;
+            }
+            else
+            {
+                thisInputDimensionOffset = thisInputDimensionInternalOffset + (prevInputDimensionOffset * inputStride[dimension]);
+                thisOutputDimensionOffset = thisOutputDimensionInternalOffset + (prevOutputDimensionOffset * outputStride[dimension]);
+            }
+
+            if (dimension == broadcastDimension)
+            {
+                for (int index = 0; index < numSecondaryInputs; ++index)
+                {
+                    auto&& secondaryInput = secondaryInputs[index];
+                    secondaryValues[index] = this->IsSecondaryInputPresent(index) ? function.ValueAt(secondaryInput, loopIndex) : nullptr;
+                }
+            }
+
+            if (dimension < numDimensions - 1)
+            {
+                // Recursive call to emit nested loop
+                auto nextBegin = function.LocalScalar<int>(0);
+                auto nextEnd = function.LocalScalar<int>(inputSize[dimension + 1]);
+                this->EmitComputeDimensionLoop(compiler, function, dimension + 1, nextBegin, nextEnd, primaryInput, secondaryInputs, output, thisInputDimensionOffset, thisOutputDimensionOffset, secondaryValues);
+            }
+            else
+            {
+                // We're in the innermost loop --- compute the value
+                auto primaryValue = function.ValueAt(primaryInput, thisInputDimensionOffset);
+                auto outputValue = this->GetFunction().Compile(function, primaryValue, secondaryValues);
+                function.SetValueAt(output, thisOutputDimensionOffset, outputValue);
+            }
+        });
+    }
+
+    template <typename ValueType, typename FunctionType>
+    bool BroadcastFunctionNode<ValueType, FunctionType>::IsSecondaryInputPresent(int index) const
+    {
+        auto secondaryInput = GetSecondaryInput(index);
+        if (secondaryInput)
+        {
+            return secondaryInput->Size() > 0;
+        }
+        else
+        {
+            return false;
+        }
+    }
+
+    template <typename ValueType, typename FunctionType>
+    void BroadcastFunctionNode<ValueType, FunctionType>::Compute() const
+    {
+        auto outputSize = GetOutputMemoryLayout().GetExtent().NumElements();
+        auto output = std::vector<ValueType>(outputSize);
+
+        const size_t prevInputOffset = 0;
+        const size_t prevOutputOffset = 0;
+        std::vector<ValueType> secondaryValues(NumSecondaryInputs(), static_cast<ValueType>(0));
+        ComputeDimensionLoop(0, output, prevInputOffset, prevOutputOffset, secondaryValues);
+
+        GetOutput().SetOutput(output);
+    }
+
+    template <typename ValueType, typename FunctionType>
+    void BroadcastFunctionNode<ValueType, FunctionType>::Compile(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
+    {
+        const auto& compilerSettings = compiler.GetCompilerOptions();
+
+        auto& module = function.GetModule();
+        auto& emitter = module.GetIREmitter();
+        auto valueType = emitter.Type(emitters::GetVariableType<ValueType>());
+        auto valuePtrType = valueType->getPointerTo();
+
+        const auto& primaryInput = GetPrimaryInput();
+        auto primaryInputSize = primaryInput.Size();
+        auto&& inputLayout = GetInputMemoryLayout();
+        auto&& inputSize = inputLayout.GetActiveSize();
+        auto secondaryInputSize = GetSecondaryInputSize();
+        DEBUG_USED(secondaryInputSize);
+        assert(secondaryInputSize == 0 || primaryInputSize % secondaryInputSize == 0);
+
+        emitters::LLVMValue pPrimaryInput = compiler.EnsurePortEmitted(primaryInput);
+        std::vector<emitters::LLVMValue> secondaryInputs;
+        std::vector<emitters::LLVMValue> secondaryValues;
+        for (int index = 0; index < NumSecondaryInputs(); ++index)
+        {
+            auto secondaryInputPort = GetSecondaryInput(index);
+            auto secondaryInputSize = secondaryInputPort->Size();
+            emitters::LLVMValue secondaryInput = (secondaryInputSize > 0) ? compiler.EnsurePortEmitted(*secondaryInputPort) : function.NullPointer(valuePtrType);
+            secondaryInputs.push_back(secondaryInput);
+            secondaryValues.push_back(nullptr);
+        }
+        emitters::LLVMValue pOutput = compiler.EnsurePortEmitted(GetOutput(), this->GetOutputPadding());
+
+        // Call recursive function to emit nested loops
+        // Note: We could just offset the input pointer at beginning instead of adding offset every time through the loop
+        // Note: We can potentially fuse adjacent loops if memory is contiguous --- it can be done by preprocessing size/stride vectors
+        bool allSecondaryInputsValid = true;
+        for (int index = 0; index < NumSecondaryInputs(); ++index)
+        {
+            if (!IsSecondaryInputPresent(index))
+            {
+                allSecondaryInputsValid = false;
+            }
+        }
+
+        const int minimumTaskSize = 4000;
+        if (compilerSettings.parallelize && allSecondaryInputsValid && primaryInputSize > 2 * minimumTaskSize)
+        {
+            // computes ceil(a/b)
+            auto CeilDiv = [](int a, int b) {
+                return (a - 1) / b + 1;
+            };
+
+            // TODO: fix up logic for deciding how many tasks to use.
+            //   want to specify minimum amount of work per task, and create fewer tasks
+            //   if we don't have enough work.
+            auto numOuterIterations = inputSize[0];
+            const int numDesiredTasks = compilerSettings.maxThreads;
+            int taskSize = std::max(CeilDiv(primaryInputSize, numDesiredTasks), minimumTaskSize);
+            const int numTasks = std::min(CeilDiv(primaryInputSize, taskSize), compilerSettings.maxThreads);
+            taskSize = CeilDiv(numOuterIterations, numTasks);
+
+            // Ugly type-getting code to get around the type of the emitted port variables being different depending
+            // on whether the node is inlined (or something).
+            emitters::LLVMTypeList taskFunctionArgTypes{ pPrimaryInput->getType() };
+            for (auto& secondaryInput : secondaryInputs)
+            {
+                taskFunctionArgTypes.push_back(secondaryInput->getType());
+            }
+            taskFunctionArgTypes.push_back(pOutput->getType());
+
+            auto taskFunction = this->GetTaskFunction(compiler, function, taskFunctionArgTypes);
+            std::vector<std::vector<emitters::LLVMValue>> taskArgs;
+            for (int taskIndex = 0; taskIndex < numTasks; ++taskIndex)
+            {
+                auto begin = function.Literal<int>(taskIndex * taskSize);
+                auto end = function.Literal<int>(std::min((taskIndex + 1) * taskSize, numOuterIterations));
+
+                std::vector<emitters::LLVMValue> args{ pPrimaryInput };
+                args.insert(args.end(), secondaryInputs.begin(), secondaryInputs.end());
+                args.insert(args.end(), { pOutput, begin, end });
+            }
+            auto tasks = function.StartTasks(taskFunction, taskArgs);
+            tasks.WaitAll(function);
+        }
+        else
+        {
+            auto prevInputDimensionOffset = function.LocalScalar();
+            auto prevOutputDimensionOffset = function.LocalScalar();
+            auto begin = function.LocalScalar<int>(0);
+            auto end = function.LocalScalar<int>(inputSize[0]);
+            EmitComputeDimensionLoop(compiler, function, 0, begin, end, pPrimaryInput, secondaryInputs, pOutput, prevInputDimensionOffset, prevOutputDimensionOffset, secondaryValues);
+        }
+    }
+
+    template <typename ValueType, typename FunctionType>
+    void BroadcastFunctionNode<ValueType, FunctionType>::WriteToArchive(utilities::Archiver& archiver) const
+    {
+        model::CompilableNode::WriteToArchive(archiver);
+
+        archiver["inputLayout"] << _inputLayout;
+        archiver["outputLayout"] << GetOutputMemoryLayout();
+        archiver["broadcastDimension"] << _broadcastDimension;
+        archiver["paddingValue"] << _paddingValue;
+    }
+
+    template <typename ValueType, typename FunctionType>
+    void BroadcastFunctionNode<ValueType, FunctionType>::ReadFromArchive(utilities::Unarchiver& archiver)
+    {
+        model::CompilableNode::ReadFromArchive(archiver);
+
+        archiver["inputLayout"] >> _inputLayout;
+        model::PortMemoryLayout outputLayout;
+        archiver["outputLayout"] >> outputLayout;
+        auto outputs = GetOutputPorts();
+        for (auto p : outputs)
+        {
+            p->SetMemoryLayout(outputLayout);
+        }
+        archiver["broadcastDimension"] >> _broadcastDimension;
+        archiver["paddingValue"] >> _paddingValue;
+    }
+
+    //
+    // BroadcastUnaryFunctionNode
+    //
+    template <typename ValueType, typename FunctionType>
+    BroadcastUnaryFunctionNode<ValueType, FunctionType>::BroadcastUnaryFunctionNode() :
+        BroadcastFunctionNode<ValueType, FunctionType>({ &_primaryInput }, { &_output }),
+        _primaryInput(this, {}, primaryInputPortName),
+        _output(this, ell::model::Node::defaultOutputPortName, 0)
+    {
+    }
+
+    template <typename ValueType, typename FunctionType>
+    BroadcastUnaryFunctionNode<ValueType, FunctionType>::BroadcastUnaryFunctionNode(const model::OutputPort<ValueType>& primaryInput, const model::PortMemoryLayout& inputLayout, const model::PortMemoryLayout& outputLayout, ValueType paddingValue) :
+        BroadcastUnaryFunctionNode<ValueType, FunctionType>(primaryInput, inputLayout, outputLayout, FunctionType{}, paddingValue)
+    {
+    }
+
+    template <typename ValueType, typename FunctionType>
+    BroadcastUnaryFunctionNode<ValueType, FunctionType>::BroadcastUnaryFunctionNode(const model::OutputPort<ValueType>& primaryInput, const model::PortMemoryLayout& inputLayout, const model::PortMemoryLayout& outputLayout, FunctionType function, ValueType paddingValue) :
+        BroadcastFunctionNode<ValueType, FunctionType>({ &_primaryInput }, inputLayout, 0, { &_output }, outputLayout, function, paddingValue),
+        _primaryInput(this, primaryInput, primaryInputPortName),
+        _output(this, ell::model::Node::defaultOutputPortName, outputLayout)
+    {
+        // Verify sizes are compatible
+        size_t totalInputSize = inputLayout.GetMemorySize();
+        if (primaryInput.Size() < totalInputSize)
+        {
+            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "Primary input too small");
+        }
+    }
+
+    template <typename ValueType, typename FunctionType>
+    void BroadcastUnaryFunctionNode<ValueType, FunctionType>::Copy(model::ModelTransformer& transformer) const
+    {
+        const auto& primaryInputElements = transformer.GetCorrespondingInputs(_primaryInput);
+        auto broadcastFunction = GetFunction();
+        auto newNode = transformer.AddNode<BroadcastUnaryFunctionNode<ValueType, FunctionType>>(primaryInputElements,
+                                                                                                this->GetInputMemoryLayout(),
+                                                                                                this->GetOutputMemoryLayout(),
+                                                                                                broadcastFunction);
+        transformer.MapNodeOutput(output, newNode->output);
+    }
+
+    template <typename ValueType, typename FunctionType>
+    utilities::ArchiveVersion BroadcastUnaryFunctionNode<ValueType, FunctionType>::GetArchiveVersion() const
+    {
+        constexpr utilities::ArchiveVersion archiveVersion = { utilities::ArchiveVersionNumbers::v5_refined_nodes };
+
+        return archiveVersion;
+    }
+
+    template <typename ValueType, typename FunctionType>
+    bool BroadcastUnaryFunctionNode<ValueType, FunctionType>::CanReadArchiveVersion(const utilities::ArchiveVersion& version) const
+    {
+        constexpr utilities::ArchiveVersion archiveVersion = { utilities::ArchiveVersionNumbers::v5_refined_nodes };
+
+        return version >= archiveVersion;
+    }
+
+    template <typename ValueType, typename FunctionType>
+    void BroadcastUnaryFunctionNode<ValueType, FunctionType>::WriteToArchive(utilities::Archiver& archiver) const
+    {
+        BroadcastFunctionNode<ValueType, FunctionType>::WriteToArchive(archiver);
+        archiver[primaryInputPortName] << _primaryInput;
+    }
+
+    template <typename ValueType, typename FunctionType>
+    void BroadcastUnaryFunctionNode<ValueType, FunctionType>::ReadFromArchive(utilities::Unarchiver& archiver)
+    {
+        BroadcastFunctionNode<ValueType, FunctionType>::ReadFromArchive(archiver);
+        archiver[primaryInputPortName] >> _primaryInput;
+    }
+
+    template <typename ValueType, typename FunctionType>
+    const model::InputPort<ValueType>* BroadcastUnaryFunctionNode<ValueType, FunctionType>::GetSecondaryInput(int index) const
+    {
+        assert(index == 0);
+        return nullptr;
+    }
+
+    //
+    // BroadcastBinaryFunctionNode
+    //
+    template <typename ValueType, typename FunctionType>
+    BroadcastBinaryFunctionNode<ValueType, FunctionType>::BroadcastBinaryFunctionNode() :
+        BroadcastFunctionNode<ValueType, FunctionType>({ &_primaryInput, &_secondaryInput }, { &_output }),
+        _primaryInput(this, {}, primaryInputPortName),
+        _secondaryInput(this, {}, secondaryInputPortName),
+        _output(this, ell::model::Node::defaultOutputPortName, 0)
+    {
+    }
+
+    template <typename ValueType, typename FunctionType>
+    BroadcastBinaryFunctionNode<ValueType, FunctionType>::BroadcastBinaryFunctionNode(const model::OutputPort<ValueType>& primaryInput, const model::PortMemoryLayout& inputLayout, const model::OutputPort<ValueType>& secondaryInput, size_t dimension, const model::PortMemoryLayout& outputLayout, ValueType paddingValue) :
+        BroadcastBinaryFunctionNode<ValueType, FunctionType>(primaryInput, inputLayout, secondaryInput, dimension, outputLayout, FunctionType{}, paddingValue)
+    {
+    }
+
+    template <typename ValueType, typename FunctionType>
+    BroadcastBinaryFunctionNode<ValueType, FunctionType>::BroadcastBinaryFunctionNode(const model::OutputPort<ValueType>& primaryInput, const model::PortMemoryLayout& inputLayout, const model::OutputPort<ValueType>& secondaryInput, size_t dimension, const model::PortMemoryLayout& outputLayout, FunctionType function, ValueType paddingValue) :
+        BroadcastFunctionNode<ValueType, FunctionType>({ &_primaryInput, &_secondaryInput }, inputLayout, dimension, { &_output }, outputLayout, function, paddingValue),
+        _primaryInput(this, primaryInput, primaryInputPortName),
+        _secondaryInput(this, secondaryInput, secondaryInputPortName),
+        _output(this, ell::model::Node::defaultOutputPortName, outputLayout)
+    {
+        // Verify sizes are compatible
+        size_t totalInputSize = inputLayout.GetMemorySize();
+        if (primaryInput.Size() < totalInputSize)
+        {
+            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "Primary input too small");
+        }
+
+        if (secondaryInput.Size() != inputLayout.GetActiveSize(dimension))
+        {
+            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "Broadcast vector size doesn't match input");
+        }
+    }
+
+    template <typename ValueType, typename FunctionType>
+    void BroadcastBinaryFunctionNode<ValueType, FunctionType>::Copy(model::ModelTransformer& transformer) const
+    {
+        const auto& primaryInputElements = transformer.GetCorrespondingInputs(_primaryInput);
+        const auto& secondaryInputElements = transformer.GetCorrespondingInputs(_secondaryInput);
+        auto newNode = transformer.AddNode<BroadcastBinaryFunctionNode<ValueType, FunctionType>>(primaryInputElements,
+                                                                                                 this->GetInputMemoryLayout(),
+                                                                                                 secondaryInputElements,
+                                                                                                 this->GetBroadcastDimension(),
+                                                                                                 this->GetOutputMemoryLayout(),
+                                                                                                 GetFunction());
+        transformer.MapNodeOutput(output, newNode->output);
+    }
+
+    template <typename ValueType, typename FunctionType>
+    void BroadcastBinaryFunctionNode<ValueType, FunctionType>::WriteToArchive(utilities::Archiver& archiver) const
+    {
+        BroadcastFunctionNode<ValueType, FunctionType>::WriteToArchive(archiver);
+        archiver[primaryInputPortName] << _primaryInput;
+        archiver[secondaryInputPortName] << _secondaryInput;
+    }
+
+    template <typename ValueType, typename FunctionType>
+    void BroadcastBinaryFunctionNode<ValueType, FunctionType>::ReadFromArchive(utilities::Unarchiver& archiver)
+    {
+        BroadcastFunctionNode<ValueType, FunctionType>::ReadFromArchive(archiver);
+        archiver[primaryInputPortName] >> _primaryInput;
+        archiver[secondaryInputPortName] >> _secondaryInput;
+    }
+
+    template <typename ValueType, typename FunctionType>
+    const model::InputPort<ValueType>* BroadcastBinaryFunctionNode<ValueType, FunctionType>::GetSecondaryInput(int index) const
+    {
+        assert(index == 0);
+        return &_secondaryInput;
+    }
+
+    //
+    // BroadcastTernaryFunctionNode
+    //
+    template <typename ValueType, typename FunctionType>
+    BroadcastTernaryFunctionNode<ValueType, FunctionType>::BroadcastTernaryFunctionNode() :
+        BroadcastFunctionNode<ValueType, FunctionType>({ &_primaryInput, &_secondaryInput1, &_secondaryInput2 }, { &_output }),
+        _primaryInput(this, {}, primaryInputPortName),
+        _secondaryInput1(this, {}, secondaryInput1PortName),
+        _secondaryInput2(this, {}, secondaryInput2PortName),
+        _output(this, ell::model::Node::defaultOutputPortName, 0)
+    {
+    }
+
+    template <typename ValueType, typename FunctionType>
+    BroadcastTernaryFunctionNode<ValueType, FunctionType>::BroadcastTernaryFunctionNode(const model::OutputPort<ValueType>& primaryInput, const model::PortMemoryLayout& inputLayout, const model::OutputPort<ValueType>& secondaryInput1, const model::OutputPort<ValueType>& secondaryInput2, size_t dimension, const model::PortMemoryLayout& outputLayout, ValueType paddingValue) :
+        BroadcastTernaryFunctionNode<ValueType, FunctionType>(primaryInput, inputLayout, secondaryInput1, secondaryInput2, dimension, outputLayout, FunctionType{}, paddingValue)
+    {
+    }
+
+    template <typename ValueType, typename FunctionType>
+    BroadcastTernaryFunctionNode<ValueType, FunctionType>::BroadcastTernaryFunctionNode(const model::OutputPort<ValueType>& primaryInput, const model::PortMemoryLayout& inputLayout, const model::OutputPort<ValueType>& secondaryInput1, const model::OutputPort<ValueType>& secondaryInput2, size_t dimension, const model::PortMemoryLayout& outputLayout, FunctionType function, ValueType paddingValue) :
+        BroadcastFunctionNode<ValueType, FunctionType>({ &_primaryInput, &_secondaryInput1, &_secondaryInput2 }, inputLayout, dimension, { &_output }, outputLayout, function, paddingValue),
+        _primaryInput(this, primaryInput, primaryInputPortName),
+        _secondaryInput1(this, secondaryInput1, secondaryInput1PortName),
+        _secondaryInput2(this, secondaryInput2, secondaryInput2PortName),
+        _output(this, ell::model::Node::defaultOutputPortName, outputLayout)
+    {
+        // Verify sizes are compatible
+        size_t totalInputSize = inputLayout.GetMemorySize();
+        if (primaryInput.Size() < totalInputSize)
+        {
+            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "Primary input too small");
+        }
+
+        if (std::max(secondaryInput1.Size(), secondaryInput2.Size()) != static_cast<size_t>(inputLayout.GetActiveSize(dimension)))
+        {
+            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, std::string("Broadcast vector size doesn't match input") + "_" + std::to_string(secondaryInput1.Size()) + "_" + std::to_string(secondaryInput2.Size()) + "_" + std::to_string(inputLayout.GetActiveSize(dimension)));
+        }
+
+        if (secondaryInput1.Size() != secondaryInput2.Size() && secondaryInput1.Size() > 0 && secondaryInput2.Size() > 0)
+        {
+            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "If present, secondary inputs must have the same size");
+        }
+
+        if (inputLayout.GetActiveSize() != outputLayout.GetActiveSize())
+        {
+            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "BroadcastFunctionNode: Input and output active area sizes don't match");
+        }
+    }
+
+    template <typename ValueType, typename FunctionType>
+    void BroadcastTernaryFunctionNode<ValueType, FunctionType>::Copy(model::ModelTransformer& transformer) const
+    {
+        const auto& primaryInputElements = transformer.GetCorrespondingInputs(_primaryInput);
+        const auto& secondaryInput1Elements = transformer.GetCorrespondingInputs(_secondaryInput1);
+        const auto& secondaryInput2Elements = transformer.GetCorrespondingInputs(_secondaryInput2);
+        auto newNode = transformer.AddNode<BroadcastTernaryFunctionNode<ValueType, FunctionType>>(primaryInputElements,
+                                                                                                  this->GetInputMemoryLayout(),
+                                                                                                  secondaryInput1Elements,
+                                                                                                  secondaryInput2Elements,
+                                                                                                  this->GetBroadcastDimension(),
+                                                                                                  this->GetOutputMemoryLayout(),
+                                                                                                  GetFunction());
+        transformer.MapNodeOutput(output, newNode->output);
+    }
+
+    template <typename ValueType, typename FunctionType>
+    void BroadcastTernaryFunctionNode<ValueType, FunctionType>::WriteToArchive(utilities::Archiver& archiver) const
+    {
+        BroadcastFunctionNode<ValueType, FunctionType>::WriteToArchive(archiver);
+        archiver[primaryInputPortName] << _primaryInput;
+        archiver[secondaryInput1PortName] << _secondaryInput1;
+        archiver[secondaryInput2PortName] << _secondaryInput2;
+    }
+
+    template <typename ValueType, typename FunctionType>
+    void BroadcastTernaryFunctionNode<ValueType, FunctionType>::ReadFromArchive(utilities::Unarchiver& archiver)
+    {
+        BroadcastFunctionNode<ValueType, FunctionType>::ReadFromArchive(archiver);
+        archiver[primaryInputPortName] >> _primaryInput;
+        archiver[secondaryInput1PortName] >> _secondaryInput1;
+        archiver[secondaryInput2PortName] >> _secondaryInput2;
+    }
+
+    template <typename ValueType, typename FunctionType>
+    const model::InputPort<ValueType>* BroadcastTernaryFunctionNode<ValueType, FunctionType>::GetSecondaryInput(int index) const
+    {
+        assert(index < 2);
+        if (index == 0)
+        {
+            return &secondaryInput1;
+        }
+        else if (index == 1)
+        {
+            return &secondaryInput2;
+        }
+        return nullptr;
+    }
+
+    //
+    // BroadcastLinearFunctionNode
+    //
+    template <typename ValueType>
+    BroadcastLinearFunctionNode<ValueType>::BroadcastLinearFunctionNode() :
+        BroadcastTernaryFunctionNode<ValueType, BroadcastLinearFunction<ValueType>>()
+    {
+    }
+
+    template <typename ValueType>
+    BroadcastLinearFunctionNode<ValueType>::BroadcastLinearFunctionNode(const model::OutputPort<ValueType>& primaryInput, const model::PortMemoryLayout& inputLayout, const model::OutputPort<ValueType>& scaleInput, const model::OutputPort<ValueType>& biasInput, size_t dimension, const model::PortMemoryLayout& outputLayout, ValueType paddingValue) :
+        BroadcastTernaryFunctionNode<ValueType, BroadcastLinearFunction<ValueType>>(primaryInput, inputLayout, scaleInput, biasInput, dimension, outputLayout, paddingValue)
+    {
+    }
+
+    template <typename ValueType>
+    void BroadcastLinearFunctionNode<ValueType>::Copy(model::ModelTransformer& transformer) const
+    {
+        const auto& primaryInputElements = transformer.GetCorrespondingInputs(primaryInput);
+        const auto& scaleInputElements = transformer.GetCorrespondingInputs(secondaryInput1);
+        const auto& biasInputElements = transformer.GetCorrespondingInputs(secondaryInput2);
+        auto newNode = transformer.AddNode<BroadcastLinearFunctionNode<ValueType>>(primaryInputElements,
+                                                                                   this->GetInputMemoryLayout(),
+                                                                                   scaleInputElements,
+                                                                                   biasInputElements,
+                                                                                   this->GetBroadcastDimension(),
+                                                                                   this->GetOutputMemoryLayout());
+        transformer.MapNodeOutput(output, newNode->output);
+    }
+
+} // namespace nodes
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/nodes/include/BufferNode.h b/libraries/nodes/include/BufferNode.h
index 9c3615ee7..e55dfc2ff 100644
--- a/libraries/nodes/include/BufferNode.h
+++ b/libraries/nodes/include/BufferNode.h
@@ -83,4 +83,103 @@ namespace nodes
 } // namespace nodes
 } // namespace ell
 
-#include "../tcc/BufferNode.tcc"
\ No newline at end of file
+#pragma region implementation
+
+namespace ell
+{
+namespace nodes
+{
+    template <typename ValueType>
+    BufferNode<ValueType>::BufferNode(const model::OutputPort<ValueType>& input, size_t windowSize) :
+        CompilableNode({ &_input }, { &_output }),
+        _input(this, input, defaultInputPortName),
+        _output(this, defaultOutputPortName, windowSize),
+        _windowSize(windowSize)
+    {
+        _samples.resize(windowSize);
+    }
+
+    template <typename ValueType>
+    BufferNode<ValueType>::BufferNode() :
+        CompilableNode({ &_input }, { &_output }),
+        _input(this, {}, defaultInputPortName),
+        _output(this, defaultOutputPortName, 0),
+        _windowSize(0)
+    {
+    }
+
+    template <typename ValueType>
+    void BufferNode<ValueType>::Compute() const
+    {
+        auto inputSize = input.Size();
+        if (inputSize > _samples.size())
+        {
+            inputSize = _samples.size();
+        }
+        auto offset = _samples.size() - inputSize;
+        if (offset > 0)
+        {
+            // Copy samples forward to make room for new samples
+            std::copy_n(_samples.begin() + offset, inputSize, _samples.begin());
+        }
+        // Copy input samples to tail
+        for (size_t index = 0; index < inputSize; ++index)
+        {
+            _samples[index + offset] = _input[index];
+        }
+        _output.SetOutput(_samples);
+    };
+
+    template <typename ValueType>
+    void BufferNode<ValueType>::Copy(model::ModelTransformer& transformer) const
+    {
+        const auto& newPortElements = transformer.GetCorrespondingInputs(_input);
+        auto newNode = transformer.AddNode<BufferNode<ValueType>>(newPortElements, _windowSize);
+        transformer.MapNodeOutput(output, newNode->output);
+    }
+
+    template <typename ValueType>
+    void BufferNode<ValueType>::Compile(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
+    {
+        int inputSize = input.Size();
+        size_t windowSize = this->GetWindowSize();
+        auto offset = windowSize - inputSize;
+
+        emitters::LLVMValue pInput = compiler.EnsurePortEmitted(input);
+        auto bufferVar = function.GetModule().Variables().AddVectorVariable<ValueType>(emitters::VariableScope::global, windowSize);
+        function.GetModule().AllocateVariable(*bufferVar);
+        emitters::LLVMValue buffer = function.GetModule().EnsureEmitted(*bufferVar);
+
+        // Copy samples forward to make room for new samples
+        function.MemoryMove<ValueType>(buffer, offset, 0, inputSize);
+
+        // Copy input samples to tail
+        function.MemoryCopy<ValueType>(pInput, 0, buffer, offset, inputSize);
+
+        // Copy to output
+        emitters::LLVMValue pOutput = compiler.EnsurePortEmitted(output);
+        function.MemoryCopy<ValueType>(buffer, 0, pOutput, 0, windowSize);
+    }
+
+    template <typename ValueType>
+    void BufferNode<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
+    {
+        Node::WriteToArchive(archiver);
+        archiver[defaultInputPortName] << _input;
+        archiver["windowSize"] << _windowSize;
+    }
+
+    template <typename ValueType>
+    void BufferNode<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
+    {
+        Node::ReadFromArchive(archiver);
+        archiver[defaultInputPortName] >> _input;
+        archiver["windowSize"] >> _windowSize;
+
+        _samples.resize(_windowSize);
+        _output.SetSize(_windowSize);
+    }
+} // namespace nodes
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/nodes/include/ConcatenationNode.h b/libraries/nodes/include/ConcatenationNode.h
index bfa520510..df9f38d17 100644
--- a/libraries/nodes/include/ConcatenationNode.h
+++ b/libraries/nodes/include/ConcatenationNode.h
@@ -85,4 +85,92 @@ namespace nodes
 } // namespace nodes
 } // namespace ell
 
-#include "../tcc/ConcatenationNode.tcc"
\ No newline at end of file
+#pragma region implementation
+
+namespace ell
+{
+namespace nodes
+{
+    template <typename ValueType>
+    ConcatenationNode<ValueType>::ConcatenationNode() :
+        CompilableNode({ &_input }, { &_output }),
+        _input(this, {}, defaultInputPortName),
+        _output(this, defaultOutputPortName, 0){};
+
+    template <typename ValueType>
+    ConcatenationNode<ValueType>::ConcatenationNode(const model::OutputPort<ValueType>& input) :
+        CompilableNode({ &_input }, { &_output }),
+        _input(this, input, defaultInputPortName),
+        _output(this, defaultOutputPortName, input.Size()){};
+
+    template <typename ValueType>
+    ConcatenationNode<ValueType>::ConcatenationNode(const model::OutputPort<ValueType>& input, const model::MemoryShape& shape) :
+        CompilableNode({ &_input }, { &_output }),
+        _input(this, input, defaultInputPortName),
+        _output(this, defaultOutputPortName, shape){};
+
+    template <typename ValueType>
+    void ConcatenationNode<ValueType>::Compute() const
+    {
+        _output.SetOutput(_input.GetValue());
+    }
+
+    template <typename ValueType>
+    void ConcatenationNode<ValueType>::Compile(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
+    {
+        assert(GetPortVariableType(_input) == GetPortVariableType(_output));
+
+        auto inputIsInputNode = (dynamic_cast<const model::InputNodeBase*>(_input.GetInputElement(0).ReferencedPort()->GetNode()) != nullptr);
+        // TODO: re-enable this branch when scalar port bug is fixed
+        if (_input.Size() != 1 && _output.Size() != 1 && !inputIsInputNode && false)
+        {
+            auto pVar = compiler.GetVariableForPort(_input.GetReferencedPort());
+            compiler.SetVariableForPort(_output, pVar);
+        }
+        else
+        {
+            auto input = function.LocalArray(compiler.EnsurePortEmitted(_input));
+            auto output = function.LocalArray(compiler.EnsurePortEmitted(_output));
+            // check if the output variable is null.
+            function.If(ell::emitters::TypedComparison::notEquals, output, function.NullPointer(output.value->getType()->getPointerElementType()->getPointerTo()), [input, output, this](emitters::IRFunctionEmitter& function) {
+                auto size = _input.Size();
+                function.For(size, [input, output](emitters::IRFunctionEmitter& function, auto i) {
+                    output[i] = input[i];
+                });
+            });
+        }
+    }
+
+    template <typename ValueType>
+    void ConcatenationNode<ValueType>::Copy(model::ModelTransformer& transformer) const
+    {
+        const auto& newPortElements = transformer.GetCorrespondingInputs(_input);
+        auto newNode = transformer.AddNode<ConcatenationNode<ValueType>>(newPortElements, GetShape());
+        transformer.MapNodeOutput(output, newNode->output);
+    }
+
+    template <typename ValueType>
+    void ConcatenationNode<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
+    {
+        Node::WriteToArchive(archiver);
+        archiver[defaultInputPortName] << _input;
+        archiver[shapeName] << GetShape().ToVector();
+    }
+
+    template <typename ValueType>
+    void ConcatenationNode<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
+    {
+        Node::ReadFromArchive(archiver);
+        archiver[defaultInputPortName] >> _input;
+        std::vector<int> shapeVector;
+        archiver[shapeName] >> shapeVector;
+        _output.SetSize(_input.Size());
+        if (shapeVector.size() >= 3)
+        {
+            SetShape({ shapeVector });
+        }
+    }
+} // namespace nodes
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/nodes/include/ConstantNode.h b/libraries/nodes/include/ConstantNode.h
index 29150b444..b5da34e21 100644
--- a/libraries/nodes/include/ConstantNode.h
+++ b/libraries/nodes/include/ConstantNode.h
@@ -111,4 +111,105 @@ namespace nodes
 } // namespace nodes
 } // namespace ell
 
-#include "../tcc/ConstantNode.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace nodes
+{
+    // superclass (Node) constructor takes two array arguments: inputs and outputs. These are pointers to our local InputPort and OutputPort storage.
+
+    // Default constructor
+    template <typename ValueType>
+    ConstantNode<ValueType>::ConstantNode() :
+        CompilableNode({}, { &_output }),
+        _output(this, defaultOutputPortName, 0){};
+
+    // Constructor for a scalar constant
+    template <typename ValueType>
+    ConstantNode<ValueType>::ConstantNode(ValueType value) :
+        CompilableNode({}, { &_output }),
+        _output(this, defaultOutputPortName, 1),
+        _values({ value }){};
+
+    // Constructor for a vector constant
+    template <typename ValueType>
+    ConstantNode<ValueType>::ConstantNode(const std::vector<ValueType>& values) :
+        CompilableNode({}, { &_output }),
+        _output(this, defaultOutputPortName, values.size()),
+        _values(values){};
+
+    template <typename ValueType>
+    ConstantNode<ValueType>::ConstantNode(const std::vector<ValueType>& values, const model::MemoryShape& shape) :
+        CompilableNode({}, { &_output }),
+        _output(this, defaultOutputPortName, shape),
+        _values(values){};
+
+    template <typename ValueType>
+    ConstantNode<ValueType>::ConstantNode(const std::vector<ValueType>& values, const model::PortMemoryLayout& layout) :
+        CompilableNode({}, { &_output }),
+        _output(this, defaultOutputPortName, layout),
+        _values(values){};
+
+    template <typename ValueType>
+    void ConstantNode<ValueType>::Compute() const
+    {
+        _output.SetOutput(_values);
+    }
+
+    template <typename ValueType>
+    void ConstantNode<ValueType>::Copy(model::ModelTransformer& transformer) const
+    {
+        auto newNode = transformer.AddNode<ConstantNode<ValueType>>(_values, _output.GetMemoryLayout().GetActiveSize());
+        transformer.MapNodeOutput(output, newNode->output);
+    }
+
+    template <typename ValueType>
+    void ConstantNode<ValueType>::Compile(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
+    {
+        auto values = this->GetValues();
+        emitters::Variable* pVar = nullptr;
+        pVar = function.GetModule().Variables().AddVariable<emitters::LiteralVectorVariable<ValueType>>(values);
+        compiler.SetVariableForPort(output, pVar); // Just set the variable corresponding to the output port to be the global variable we created
+    }
+
+    template <typename ValueType>
+    utilities::ArchiveVersion ConstantNode<ValueType>::GetArchiveVersion() const
+    {
+        return utilities::ArchiveVersionNumbers::v8_port_memory_layout;
+    }
+
+    template <typename ValueType>
+    bool ConstantNode<ValueType>::CanReadArchiveVersion(const utilities::ArchiveVersion& version) const
+    {
+        return version <= utilities::ArchiveVersionNumbers::v8_port_memory_layout;
+    }
+
+    template <typename ValueType>
+    void ConstantNode<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
+    {
+        Node::WriteToArchive(archiver);
+        archiver["values"] << _values;
+        archiver["layout"] << _output.GetMemoryLayout();
+    }
+
+    template <typename ValueType>
+    void ConstantNode<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
+    {
+        Node::ReadFromArchive(archiver);
+        archiver["values"] >> _values;
+        if (archiver.HasNextPropertyName("layout"))
+        {
+            model::PortMemoryLayout layout;
+            archiver["layout"] >> layout;
+            _output.SetMemoryLayout(layout);
+        }
+        else
+        {
+            _output.SetSize(_values.size());
+        }
+    }
+} // namespace nodes
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/nodes/include/DTWDistanceNode.h b/libraries/nodes/include/DTWDistanceNode.h
index cd7c536a3..b9c8593c0 100644
--- a/libraries/nodes/include/DTWDistanceNode.h
+++ b/libraries/nodes/include/DTWDistanceNode.h
@@ -93,4 +93,264 @@ namespace nodes
 } // namespace nodes
 } // namespace ell
 
-#include "../tcc/DTWDistanceNode.tcc"
\ No newline at end of file
+#pragma region implementation
+
+#include <emitters/include/IRLocalScalar.h>
+
+#include <limits>
+
+namespace ell
+{
+namespace nodes
+{
+    namespace DTWDistanceNodeImpl
+    {
+        template <typename ValueType>
+        double Variance(const std::vector<std::vector<ValueType>>& prototype)
+        {
+            double sum = 0; // sum(x)
+            double sumSquares = 0; // sum(x^2)
+            size_t size = 0;
+            for (const auto& vec : prototype)
+            {
+                size += vec.size();
+                for (auto x : vec)
+                {
+                    sum += x;
+                    sumSquares += (x * x);
+                }
+            }
+            return (sumSquares - ((sum * sum) / size)) / size;
+        }
+    } // namespace DTWDistanceNodeImpl
+
+    template <typename ValueType>
+    DTWDistanceNode<ValueType>::DTWDistanceNode() :
+        CompilableNode({ &_input }, { &_output }),
+        _input(this, {}, defaultInputPortName),
+        _output(this, defaultOutputPortName, 1),
+        _sampleDimension(0),
+        _prototypeLength(0),
+        _prototypeVariance(0)
+    {
+    }
+
+    template <typename ValueType>
+    DTWDistanceNode<ValueType>::DTWDistanceNode(const model::OutputPort<ValueType>& input, const std::vector<std::vector<ValueType>>& prototype) :
+        CompilableNode({ &_input }, { &_output }),
+        _input(this, input, defaultInputPortName),
+        _output(this, defaultOutputPortName, 1),
+        _prototype(prototype)
+    {
+        _sampleDimension = input.Size();
+        _prototypeLength = prototype.size();
+        _d.resize(_prototypeLength + 1);
+        _s.resize(_prototypeLength + 1);
+
+        _prototypeVariance = DTWDistanceNodeImpl::Variance(_prototype);
+        // _threshold = std::sqrt(-2 * std::log(confidenceThreshold)) * _prototypeVariance;
+        Reset();
+    }
+
+    template <typename ValueType>
+    void DTWDistanceNode<ValueType>::Reset()
+    {
+        std::fill(_d.begin() + 1, _d.end(), std::numeric_limits<ValueType>::max());
+        _d[0] = 0.0;
+        std::fill(_s.begin(), _s.end(), 0);
+        _currentTime = 0;
+    }
+
+    template <typename T>
+    float distance(const std::vector<T>& a, const std::vector<T>& b)
+    {
+        T s = 0;
+        for (size_t index = 0; index < a.size(); index++)
+        {
+            s += std::abs(a[index] - b[index]);
+        }
+        return static_cast<float>(s);
+    }
+
+    template <typename ValueType>
+    void DTWDistanceNode<ValueType>::Compute() const
+    {
+        std::vector<ValueType> input = _input.GetValue();
+        auto t = ++_currentTime;
+        auto dLast = _d[0] = 0;
+        auto sLast = _s[0] = t;
+
+        ValueType bestDist = 0;
+        int bestStart = 0;
+        for (size_t index = 1; index < _prototypeLength + 1; ++index)
+        {
+            auto d_iMinus1 = _d[index - 1];
+            auto dPrev_iMinus1 = dLast;
+            auto dPrev_i = _d[index];
+            auto s_iMinus1 = _s[index - 1];
+            auto sPrev_iMinus1 = sLast;
+            auto sPrev_i = _s[index];
+
+            bestDist = d_iMinus1;
+            bestStart = s_iMinus1;
+            if (dPrev_i < bestDist)
+            {
+                bestDist = dPrev_i;
+                bestStart = sPrev_i;
+            }
+            if (dPrev_iMinus1 < bestDist)
+            {
+                bestDist = dPrev_iMinus1;
+                bestStart = sPrev_iMinus1;
+            }
+            bestDist += distance(_prototype[index - 1], input);
+
+            _d[index] = bestDist;
+            _s[index] = bestStart;
+        }
+        assert(bestDist == _d[_prototypeLength]);
+        assert(bestStart == _s[_prototypeLength]);
+        auto result = bestDist / _prototypeVariance;
+
+        // Ensure best match is between 80% and 120% of prototype length
+        auto timeDiff = _currentTime - bestStart;
+        if (timeDiff < _prototypeLength * 0.8 || timeDiff > _prototypeLength * 1.2)
+        {
+            bestDist = std::numeric_limits<ValueType>::max();
+        }
+
+        _output.SetOutput({ static_cast<ValueType>(result) });
+    };
+
+    template <typename ValueType>
+    void DTWDistanceNode<ValueType>::Copy(model::ModelTransformer& transformer) const
+    {
+        const auto& newinput = transformer.GetCorrespondingInputs(_input);
+        auto newNode = transformer.AddNode<DTWDistanceNode<ValueType>>(newinput, _prototype);
+        transformer.MapNodeOutput(output, newNode->output);
+    }
+
+    template <typename ValueType>
+    std::vector<ValueType> DTWDistanceNode<ValueType>::GetPrototypeData() const
+    {
+        std::vector<ValueType> result;
+        result.reserve(_prototypeLength * _sampleDimension);
+
+        for (const auto& vec : _prototype)
+        {
+            result.insert(result.end(), vec.begin(), vec.end());
+        }
+        return result;
+    }
+
+    template <typename ValueType>
+    void DTWDistanceNode<ValueType>::Compile(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
+    {
+        static_assert(!std::is_same<ValueType, bool>(), "Cannot instantiate boolean DTW nodes");
+
+        auto inputType = GetPortVariableType(_input);
+        assert(inputType == GetPortVariableType(_output));
+        VerifyIsScalar(_output);
+
+        auto input = function.LocalArray(compiler.EnsurePortEmitted(_input));
+        auto result = compiler.EnsurePortEmitted(_output);
+
+        // The prototype (constant)
+        emitters::Variable* pVarPrototype = function.GetModule().Variables().AddVariable<emitters::LiteralVectorVariable<ValueType>>(GetPrototypeData());
+
+        // Global variables for the dynamic programming memory
+        emitters::Variable* pVarD = function.GetModule().Variables().AddVariable<emitters::InitializedVectorVariable<ValueType>>(emitters::VariableScope::global, _prototypeLength + 1);
+
+        // get global state vars
+        auto prototypeVector = function.LocalArray(function.GetModule().EnsureEmitted(*pVarPrototype));
+        auto pD = function.LocalArray(function.GetModule().EnsureEmitted(*pVarD));
+
+        // incorrect usage of function.Variable --- should use IRModuleEmitter::EmitX(variable)
+        auto dist = function.Variable(inputType, "dist");
+        auto protoIndex = function.Variable(emitters::VariableType::Int32, "i");
+        auto dLast = function.Variable(inputType, "dLast");
+        auto bestDist = function.Variable(inputType, "bestDist");
+
+        // initialize variables
+        function.StoreZero(protoIndex);
+        function.StoreZero(dLast);
+
+        function.For(_prototypeLength, [pD, dLast, bestDist, dist, protoIndex, input, prototypeVector, this](emitters::IRFunctionEmitter& function, emitters::IRLocalScalar iMinusOne) {
+            auto i = iMinusOne + 1;
+            auto d_iMinus1 = pD[iMinusOne];
+            auto dPrev_iMinus1 = function.LocalScalar(function.Load(dLast));
+            auto dPrev_i = pD[i];
+
+            function.Store(bestDist, static_cast<emitters::IRLocalScalar>(d_iMinus1));
+
+            function.If(dPrev_i < d_iMinus1, [bestDist, dPrev_i](auto& function) {
+                function.Store(bestDist, static_cast<emitters::IRLocalScalar>(dPrev_i));
+            });
+
+            function.If(dPrev_iMinus1 < function.Load(bestDist), [bestDist, dPrev_iMinus1](auto& function) {
+                function.Store(bestDist, dPrev_iMinus1);
+            });
+
+            // Get dist
+            function.StoreZero(dist);
+            function.For(_sampleDimension, [dist, protoIndex, input, prototypeVector](emitters::IRFunctionEmitter& function, auto j) {
+                auto inputValue = input[j];
+                auto protoValue = prototypeVector[function.LocalScalar(function.Load(protoIndex))];
+                auto absDiff = emitters::Abs(inputValue - protoValue);
+                function.OperationAndUpdate(dist, emitters::GetAddForValueType<ValueType>(), absDiff);
+                function.OperationAndUpdate(protoIndex, emitters::TypedOperator::add, function.Literal(1));
+            });
+
+            function.OperationAndUpdate(bestDist, emitters::GetAddForValueType<ValueType>(), function.Load(dist)); // x += dist;
+            pD[i] = function.Load(bestDist); // d[i] = x;
+        });
+
+        function.Store(result, function.Load(bestDist) / function.LocalScalar<ValueType>(_prototypeVariance));
+    }
+
+    template <typename ValueType>
+    void DTWDistanceNode<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
+    {
+        Node::WriteToArchive(archiver);
+        archiver[defaultInputPortName] << _input;
+        archiver[defaultOutputPortName] << _output;
+        // Since we know the prototype  will always be rectangular, we
+        // archive it as a matrix here.
+        auto numRows = _prototype.size();
+        auto numColumns = _prototype[0].size();
+        std::vector<double> elements;
+        elements.reserve(numRows * numColumns);
+        for (const auto& row : _prototype)
+        {
+            elements.insert(elements.end(), row.begin(), row.end());
+        }
+        archiver["prototype_rows"] << numRows;
+        archiver["prototype_columns"] << numColumns;
+        math::Matrix<double, math::MatrixLayout::columnMajor> temp(numRows, numColumns, elements);
+        math::MatrixArchiver::Write(temp, "prototype", archiver);
+    }
+
+    template <typename ValueType>
+    void DTWDistanceNode<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
+    {
+        Node::ReadFromArchive(archiver);
+        archiver[defaultInputPortName] >> _input;
+        archiver[defaultOutputPortName] >> _output;
+        size_t numRows;
+        size_t numColumns;
+        archiver["prototype_rows"] >> numRows;
+        archiver["prototype_columns"] >> numColumns;
+        math::Matrix<ValueType, math::MatrixLayout::columnMajor> temp(numRows, numColumns);
+        math::MatrixArchiver::Read(temp, "prototype", archiver);
+        for (size_t i = 0; i < numRows; i++)
+        {
+            _prototype.emplace_back(temp.GetRow(i).ToArray());
+        }
+        _prototypeLength = _prototype.size();
+        _d.resize(_prototypeLength + 1);
+        _s.resize(_prototypeLength + 1);
+    }
+} // namespace nodes
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/nodes/include/DebugSinkNode.h b/libraries/nodes/include/DebugSinkNode.h
index edf272e7e..2418eef7a 100644
--- a/libraries/nodes/include/DebugSinkNode.h
+++ b/libraries/nodes/include/DebugSinkNode.h
@@ -102,4 +102,108 @@ namespace nodes
 } // namespace nodes
 } // namespace ell
 
-#include "../tcc/DebugSinkNode.tcc"
\ No newline at end of file
+#pragma region implementation
+
+#include <utilities/include/Debug.h>
+#include <utilities/include/Exception.h>
+
+namespace ell
+{
+namespace nodes
+{
+    template <typename ValueType>
+    DebugSinkNode<ValueType>::DebugSinkNode() :
+        CompilableNode({ &_input }, { &_output }),
+        _input(this, {}, defaultInputPortName),
+        _output(this, defaultOutputPortName, 0),
+        _userData(nullptr)
+    {
+    }
+
+    template <typename ValueType>
+    DebugSinkNode<ValueType>::DebugSinkNode(const model::OutputPort<ValueType>& input, DebugSinkFunction<ValueType> sink, const std::string& label, void* userData, const std::string& sinkFunctionName) :
+        CompilableNode({ &_input }, { &_output }),
+        _input(this, input, defaultInputPortName),
+        _output(this, defaultOutputPortName, _input.Size()),
+        _label(label),
+        _userData(userData),
+        _sinkFunctionName(sinkFunctionName),
+        _sink(std::move(sink))
+    {
+    }
+
+    template <typename ValueType>
+    bool DebugSinkNode<ValueType>::ShouldCompileInline() const
+    {
+        return true;
+    }
+
+    template <typename ValueType>
+    void DebugSinkNode<ValueType>::Compute() const
+    {
+        DEBUG_THROW(_sink == nullptr, utilities::InputException(utilities::InputExceptionErrors::nullReference, "Sink function is not set"));
+
+        auto result = EvaluateInput();
+        if (result && _sink != nullptr)
+        {
+            _sink(_label, _input.GetValue(), _userData);
+        }
+        _output.SetOutput(_input.GetValue());
+    }
+
+    template <typename ValueType>
+    void DebugSinkNode<ValueType>::Compile(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
+    {
+        emitters::LLVMValue pInput = compiler.EnsurePortEmitted(input);
+        auto userData = function.Pointer((char*)_userData);
+
+        // EvaluateInput defaults to 'pass through' in base implementation, which means
+        // we always call the sink function
+        const emitters::NamedVariableTypeList parameters = { { "label", emitters::GetVariableType<char*>() },
+                                                             { "output", emitters::GetPointerType(emitters::GetVariableType<ValueType>()) },
+                                                             { "userData", emitters::GetVariableType<char*>() } };
+
+        // Callback signature: void DebugSinkNode(char* label, ValueType* array, char* userData)
+        function.GetModule().DeclareFunction(_sinkFunctionName, emitters::VariableType::Void, parameters);
+        emitters::LLVMFunction pSinkFunction = function.GetModule().GetFunction(_sinkFunctionName);
+        function.Call(pSinkFunction, { function.Literal(_label), function.PointerOffset(pInput, function.Literal(0)), userData });
+
+        // Tag the sink function as a callback that is emitted in headers
+        function.IncludeInHeader();
+    }
+
+    template <typename ValueType>
+    void DebugSinkNode<ValueType>::Copy(model::ModelTransformer& transformer) const
+    {
+        const auto& newPortElements = transformer.GetCorrespondingInputs(_input);
+        auto newNode = transformer.AddNode<DebugSinkNode<ValueType>>(newPortElements, _sink, _label, _userData, _sinkFunctionName);
+        transformer.MapNodeOutput(output, newNode->output);
+    }
+
+    template <typename ValueType>
+    void DebugSinkNode<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
+    {
+        Node::WriteToArchive(archiver);
+        archiver[defaultInputPortName] << _input;
+        archiver["sinkFunctionName"] << _sinkFunctionName;
+    }
+
+    template <typename ValueType>
+    void DebugSinkNode<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
+    {
+        Node::ReadFromArchive(archiver);
+        archiver[defaultInputPortName] >> _input;
+        archiver["sinkFunctionName"] >> _sinkFunctionName;
+        // _sink needs to be set separately
+    }
+
+    template <typename ValueType>
+    bool DebugSinkNode<ValueType>::EvaluateInput() const
+    {
+        // Default pass through (derived classes will override).
+        return true;
+    }
+}; // namespace nodes
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/nodes/include/DelayNode.h b/libraries/nodes/include/DelayNode.h
index 2238189d7..89c9b1178 100644
--- a/libraries/nodes/include/DelayNode.h
+++ b/libraries/nodes/include/DelayNode.h
@@ -84,4 +84,101 @@ namespace nodes
 } // namespace nodes
 } // namespace ell
 
-#include "../tcc/DelayNode.tcc"
\ No newline at end of file
+#pragma region implementation
+
+namespace ell
+{
+namespace nodes
+{
+    template <typename ValueType>
+    DelayNode<ValueType>::DelayNode(const model::OutputPort<ValueType>& input, size_t windowSize) :
+        CompilableNode({ &_input }, { &_output }),
+        _input(this, input, defaultInputPortName),
+        _output(this, defaultOutputPortName, _input.Size()),
+        _windowSize(windowSize)
+    {
+        auto dimension = input.Size();
+        for (size_t index = 0; index < windowSize; ++index)
+        {
+            _samples.push_back(std::vector<ValueType>(dimension));
+        }
+    }
+
+    template <typename ValueType>
+    DelayNode<ValueType>::DelayNode() :
+        CompilableNode({ &_input }, { &_output }),
+        _input(this, {}, defaultInputPortName),
+        _output(this, defaultOutputPortName, 0),
+        _windowSize(0)
+    {
+    }
+
+    template <typename ValueType>
+    void DelayNode<ValueType>::Compute() const
+    {
+        auto lastBufferedSample = _samples[0];
+        _samples.push_back(_input.GetValue());
+        _samples.erase(_samples.begin());
+        _output.SetOutput(lastBufferedSample);
+    };
+
+    template <typename ValueType>
+    void DelayNode<ValueType>::Copy(model::ModelTransformer& transformer) const
+    {
+        const auto& newPortElements = transformer.GetCorrespondingInputs(_input);
+        auto newNode = transformer.AddNode<DelayNode<ValueType>>(newPortElements, _windowSize);
+        transformer.MapNodeOutput(output, newNode->output);
+    }
+
+    template <typename ValueType>
+    void DelayNode<ValueType>::Compile(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
+    {
+        emitters::LLVMValue result = compiler.EnsurePortEmitted(output);
+
+        size_t sampleSize = output.Size();
+        size_t windowSize = this->GetWindowSize();
+        size_t bufferSize = sampleSize * windowSize;
+
+        //
+        // Delay nodes are always long lived - either globals or heap. Currently, we use globals
+        // Each sample chunk is of size == sampleSize. The number of chunks we hold onto == windowSize
+        // We need two buffers - one for the entire lot, one for the "last" chunk forwarded to the next operator
+        //
+        emitters::Variable* delayLineVar = function.GetModule().Variables().AddVariable<emitters::InitializedVectorVariable<ValueType>>(emitters::VariableScope::global, bufferSize);
+        emitters::LLVMValue delayLine = function.GetModule().EnsureEmitted(*delayLineVar);
+
+        //
+        // We implement a delay as a Shift Register
+        //
+        emitters::LLVMValue inputBuffer = compiler.EnsurePortEmitted(input);
+        function.ShiftAndUpdate<ValueType>(delayLine, bufferSize, sampleSize, inputBuffer, result);
+    }
+
+    template <typename ValueType>
+    void DelayNode<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
+    {
+        Node::WriteToArchive(archiver);
+        archiver[defaultInputPortName] << _input;
+        archiver["windowSize"] << _windowSize;
+    }
+
+    template <typename ValueType>
+    void DelayNode<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
+    {
+        Node::ReadFromArchive(archiver);
+        archiver[defaultInputPortName] >> _input;
+        archiver["windowSize"] >> _windowSize;
+
+        auto dimension = _input.Size();
+        _samples.clear();
+        _samples.reserve(_windowSize);
+        for (size_t index = 0; index < _windowSize; ++index)
+        {
+            _samples.push_back(std::vector<ValueType>(dimension));
+        }
+        _output.SetSize(dimension);
+    }
+} // namespace nodes
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/nodes/include/DemultiplexerNode.h b/libraries/nodes/include/DemultiplexerNode.h
index 16b25a8f8..3c364e445 100644
--- a/libraries/nodes/include/DemultiplexerNode.h
+++ b/libraries/nodes/include/DemultiplexerNode.h
@@ -85,4 +85,115 @@ namespace nodes
 } // namespace nodes
 } // namespace ell
 
-#include "../tcc/DemultiplexerNode.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace nodes
+{
+    template <typename ValueType, typename SelectorType>
+    DemultiplexerNode<ValueType, SelectorType>::DemultiplexerNode() :
+        Node({ &_input, &_selector }, { &_output }),
+        _input(this, {}, defaultInputPortName),
+        _selector(this, {}, selectorPortName),
+        _output(this, defaultOutputPortName, 0),
+        _defaultValue(0)
+    {
+    }
+
+    template <typename ValueType, typename SelectorType>
+    DemultiplexerNode<ValueType, SelectorType>::DemultiplexerNode(const model::OutputPort<ValueType>& input, const model::OutputPort<SelectorType>& selector, size_t outputSize, ValueType defaultValue) :
+        Node({ &_input, &_selector }, { &_output }),
+        _input(this, input, defaultInputPortName),
+        _selector(this, selector, selectorPortName),
+        _output(this, defaultOutputPortName, outputSize),
+        _defaultValue(defaultValue)
+    {
+        if (selector.Size() != 1)
+        {
+            throw ell::utilities::Exception("Error: Condition must be 1-D signal");
+        }
+        if (input.Size() != 1)
+        {
+            throw ell::utilities::Exception("Error: Input must be 1-D signal");
+        }
+    }
+
+    template <typename ValueType, typename SelectorType>
+    void DemultiplexerNode<ValueType, SelectorType>::Compute() const
+    {
+        std::vector<ValueType> outputValue(_output.Size(), _defaultValue);
+        int index = (int)_selector[0];
+        outputValue[index] = _input[0];
+        _output.SetOutput(outputValue);
+    }
+
+    template <typename ValueType, typename SelectorType>
+    void DemultiplexerNode<ValueType, SelectorType>::WriteToArchive(utilities::Archiver& archiver) const
+    {
+        Node::WriteToArchive(archiver);
+        archiver[defaultInputPortName] << _input;
+        archiver[selectorPortName] << _selector;
+        archiver["size"] << _output.Size();
+        archiver["defaultValue"] << _defaultValue;
+    }
+
+    template <typename ValueType, typename SelectorType>
+    void DemultiplexerNode<ValueType, SelectorType>::ReadFromArchive(utilities::Unarchiver& archiver)
+    {
+        Node::ReadFromArchive(archiver);
+        archiver[defaultInputPortName] >> _input;
+        archiver[selectorPortName] >> _selector;
+        size_t size;
+        archiver["size"] >> size;
+        _output.SetSize(size);
+        archiver["defaultValue"] >> _defaultValue;
+    }
+
+    template <typename ValueType, typename SelectorType>
+    void DemultiplexerNode<ValueType, SelectorType>::Copy(model::ModelTransformer& transformer) const
+    {
+        const auto& newInput = transformer.GetCorrespondingInputs(_input);
+        const auto& newSelector = transformer.GetCorrespondingInputs(_selector);
+        auto newNode = transformer.AddNode<DemultiplexerNode<ValueType, SelectorType>>(newInput, newSelector, output.Size(), _defaultValue);
+        transformer.MapNodeOutput(output, newNode->output);
+    }
+
+    template <typename ValueType>
+    const model::OutputPort<int>& CastIfNecessary(const model::OutputPort<ValueType>& values, model::ModelTransformer& transformer)
+    {
+        auto castNode = transformer.AddNode<TypeCastNode<ValueType, int>>(values);
+        return castNode->output;
+    }
+
+    template <>
+    inline const model::OutputPort<int>& CastIfNecessary<int>(const model::OutputPort<int>& values, model::ModelTransformer& transformer)
+    {
+        return values;
+    }
+
+    template <typename ValueType, typename SelectorType>
+    bool DemultiplexerNode<ValueType, SelectorType>::Refine(model::ModelTransformer& transformer) const
+    {
+        const auto& newInput = transformer.GetCorrespondingInputs(_input);
+        const auto& newSelector = transformer.GetCorrespondingInputs(_selector);
+        const auto& newSelectorInt = CastIfNecessary(newSelector, transformer);
+
+        auto defaultNode = transformer.AddNode<ConstantNode<ValueType>>(_defaultValue);
+        model::PortElements<ValueType> outputElements;
+        auto size = _output.Size();
+        for (size_t index = 0; index < size; ++index)
+        {
+            auto indexNode = transformer.AddNode<ConstantNode<int>>(static_cast<int>(index));
+            auto isEqualNode = transformer.AddNode<BinaryPredicateNode<int>>(newSelectorInt, indexNode->output, emitters::BinaryPredicateType::equal);
+            auto ifNode = transformer.AddNode<nodes::MultiplexerNode<ValueType, bool>>(model::PortElements<ValueType>{ defaultNode->output, newInput }, isEqualNode->output);
+            outputElements.Append(ifNode->output);
+        }
+
+        transformer.MapNodeOutput(output, outputElements);
+        return true;
+    }
+} // namespace nodes
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/nodes/include/DotProductNode.h b/libraries/nodes/include/DotProductNode.h
index 0cebe9bac..23b79aed8 100644
--- a/libraries/nodes/include/DotProductNode.h
+++ b/libraries/nodes/include/DotProductNode.h
@@ -87,4 +87,125 @@ namespace nodes
 } // namespace nodes
 } // namespace ell
 
-#include "../tcc/DotProductNode.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace nodes
+{
+    template <typename ValueType>
+    DotProductNode<ValueType>::DotProductNode() :
+        CompilableNode({ &_input1, &_input2 }, { &_output }),
+        _input1(this, {}, defaultInput1PortName),
+        _input2(this, {}, defaultInput2PortName),
+        _output(this, defaultOutputPortName, 1)
+    {
+    }
+
+    template <typename ValueType>
+    DotProductNode<ValueType>::DotProductNode(const model::OutputPort<ValueType>& input1, const model::OutputPort<ValueType>& input2) :
+        CompilableNode({ &_input1, &_input2 }, { &_output }),
+        _input1(this, input1, defaultInput1PortName),
+        _input2(this, input2, defaultInput2PortName),
+        _output(this, defaultOutputPortName, 1)
+    {
+    }
+
+    template <typename ValueType>
+    void DotProductNode<ValueType>::Compute() const
+    {
+        ValueType result = 0;
+        for (size_t index = 0; index < _input1.Size(); ++index)
+        {
+            result += _input1[index] * _input2[index];
+        }
+        _output.SetOutput({ result });
+    };
+
+    template <typename ValueType>
+    void DotProductNode<ValueType>::Copy(model::ModelTransformer& transformer) const
+    {
+        const auto& newInput1 = transformer.GetCorrespondingInputs(_input1);
+        const auto& newInput2 = transformer.GetCorrespondingInputs(_input2);
+        auto newNode = transformer.AddNode<DotProductNode<ValueType>>(newInput1, newInput2);
+        transformer.MapNodeOutput(output, newNode->output);
+    }
+
+    template <typename ValueType>
+    bool DotProductNode<ValueType>::Refine(model::ModelTransformer& transformer) const
+    {
+        // Maybe... in reality, dot product will likely want to be computed as in Compute() above
+        const auto& newInput1 = transformer.GetCorrespondingInputs(_input1);
+        const auto& newInput2 = transformer.GetCorrespondingInputs(_input2);
+        auto multNode = transformer.AddNode<BinaryOperationNode<ValueType>>(newInput1, newInput2, emitters::BinaryOperationType::coordinatewiseMultiply);
+        auto sumNode = transformer.AddNode<SumNode<ValueType>>(multNode->output);
+
+        transformer.MapNodeOutput(output, sumNode->output);
+        return true;
+    }
+
+    template <typename ValueType>
+    void DotProductNode<ValueType>::Compile(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
+    {
+        static_assert(!std::is_same<ValueType, bool>(), "Cannot instantiate boolean dot product nodes");
+        if (!compiler.GetCompilerOptions().unrollLoops)
+        {
+            CompileDotProductLoop(compiler, function);
+        }
+        else
+        {
+            CompileDotProductExpanded(compiler, function);
+        }
+    }
+
+    template <typename ValueType>
+    void DotProductNode<ValueType>::CompileDotProductLoop(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
+    {
+        emitters::LLVMValue pLVector = compiler.EnsurePortEmitted(input1);
+        emitters::LLVMValue pRVector = compiler.EnsurePortEmitted(input2);
+        int count = static_cast<int>(input1.Size());
+        emitters::LLVMValue pResult = compiler.EnsurePortEmitted(output);
+        if (compiler.GetCompilerOptions().inlineOperators)
+        {
+            function.DotProduct(count, pLVector, pRVector, pResult);
+        }
+        else
+        {
+            function.Call(function.GetModule().GetRuntime().GetDotProductFunction<ValueType>(), { function.Literal(count), function.PointerOffset(pLVector, 0), function.PointerOffset(pRVector, 0), function.PointerOffset(pResult, 0) });
+        }
+    }
+
+    template <typename ValueType>
+    void DotProductNode<ValueType>::CompileDotProductExpanded(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
+    {
+        emitters::LLVMValue pResult = compiler.EnsurePortEmitted(output);
+
+        function.StoreZero(pResult);
+        for (size_t i = 0; i < input1.Size(); ++i)
+        {
+            emitters::LLVMValue pLeftValue = compiler.LoadPortElementVariable(input1.GetInputElement(i));
+            emitters::LLVMValue pRightValue = compiler.LoadPortElementVariable(input2.GetInputElement(i));
+            emitters::LLVMValue pMultiplyResult = function.Operator(emitters::GetMultiplyForValueType<ValueType>(), pLeftValue, pRightValue);
+            function.OperationAndUpdate(pResult, emitters::GetAddForValueType<ValueType>(), pMultiplyResult);
+        }
+    }
+
+    template <typename ValueType>
+    void DotProductNode<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
+    {
+        Node::WriteToArchive(archiver);
+        archiver[defaultInput1PortName] << _input1;
+        archiver[defaultInput2PortName] << _input2;
+    }
+
+    template <typename ValueType>
+    void DotProductNode<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
+    {
+        Node::ReadFromArchive(archiver);
+        archiver[defaultInput1PortName] >> _input1;
+        archiver[defaultInput2PortName] >> _input2;
+    }
+} // namespace nodes
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/nodes/include/ExtremalValueNode.h b/libraries/nodes/include/ExtremalValueNode.h
index 2a4b14e18..8e47a6b0e 100644
--- a/libraries/nodes/include/ExtremalValueNode.h
+++ b/libraries/nodes/include/ExtremalValueNode.h
@@ -148,4 +148,185 @@ namespace nodes
 } // namespace nodes
 } // namespace ell
 
-#include "../tcc/ExtremalValueNode.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace nodes
+{
+    template <typename ValueType, bool max>
+    ExtremalValueNode<ValueType, max>::ExtremalValueNode() :
+        CompilableNode({ &_input }, { &_val, &_argVal }),
+        _input(this, {}, inputPortName),
+        _val(this, valPortName, 1),
+        _argVal(this, argValPortName, 1)
+    {
+    }
+
+    template <typename ValueType, bool max>
+    ExtremalValueNode<ValueType, max>::ExtremalValueNode(const model::OutputPort<ValueType>& input) :
+        CompilableNode({ &_input }, { &_val, &_argVal }),
+        _input(this, input, inputPortName),
+        _val(this, valPortName, 1),
+        _argVal(this, argValPortName, 1)
+    {
+    }
+
+    template <typename ValueType, bool max>
+    std::string ExtremalValueNode<ValueType, max>::GetTypeName()
+    {
+        if (max)
+        {
+            return utilities::GetCompositeTypeName<ValueType, std::true_type>("ExtremalValueNode");
+        }
+        else
+        {
+            return utilities::GetCompositeTypeName<ValueType, std::false_type>("ExtremalValueNode");
+        }
+    }
+
+    template <typename ValueType, bool max>
+    void ExtremalValueNode<ValueType, max>::Compute() const
+    {
+        auto inputValues = _input.GetValue();
+        decltype(std::max_element(inputValues.begin(), inputValues.end())) result;
+        if (max)
+        {
+            result = std::max_element(inputValues.begin(), inputValues.end());
+        }
+        else
+        {
+            result = std::min_element(inputValues.begin(), inputValues.end());
+        }
+        auto val = *result;
+        auto index = result - inputValues.begin();
+        _val.SetOutput({ val });
+        _argVal.SetOutput({ (int)index });
+    };
+
+    template <typename ValueType, bool max>
+    emitters::TypedComparison ExtremalValueNode<ValueType, max>::GetComparison() const
+    {
+        if (IsMaxNode())
+        {
+            return emitters::GetComparison<ValueType>(emitters::BinaryPredicateType::greater);
+        }
+        else
+        {
+            return emitters::GetComparison<ValueType>(emitters::BinaryPredicateType::less);
+        }
+    }
+
+    template <typename ValueType, bool max>
+    void ExtremalValueNode<ValueType, max>::Compile(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
+    {
+        VerifyIsScalar(val);
+        VerifyIsScalar(argVal);
+        if (!compiler.GetCompilerOptions().unrollLoops)
+        {
+            CompileLoop(compiler, function);
+        }
+        else
+        {
+            CompileExpanded(compiler, function);
+        }
+    }
+
+    template <typename ValueType, bool max>
+    void ExtremalValueNode<ValueType, max>::CompileLoop(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
+    {
+        emitters::LLVMValue inputVal = compiler.EnsurePortEmitted(input);
+        emitters::LLVMValue outVal = compiler.EnsurePortEmitted(val);
+        emitters::LLVMValue outArgVal = compiler.EnsurePortEmitted(argVal);
+        auto inputType = GetPortVariableType(input);
+        auto numInputs = input.Size();
+
+        emitters::LLVMValue bestVal = function.Variable(inputType, "bestVal");
+        emitters::LLVMValue bestIndex = function.Variable(ell::emitters::VariableType::Int32, "bestArgVal");
+
+        auto val0 = function.ValueAt(inputVal, function.Literal(0));
+        function.Store(bestVal, val0);
+        function.StoreZero(bestIndex);
+
+        function.For(1, numInputs, 1, [inputVal, bestVal, bestIndex, this](emitters::IRFunctionEmitter& function, emitters::LLVMValue i) {
+            auto val = function.ValueAt(inputVal, i);
+            function.If(GetComparison(), val, function.Load(bestVal), [bestVal, bestIndex, val, i](auto& function) {
+                function.Store(bestVal, val);
+                function.Store(bestIndex, i);
+            });
+        });
+
+        function.Store(outVal, function.Load(bestVal));
+        function.Store(outArgVal, function.Load(bestIndex));
+    }
+
+    template <typename ValueType, bool max>
+    void ExtremalValueNode<ValueType, max>::CompileExpanded(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
+    {
+        emitters::LLVMValue outVal = compiler.EnsurePortEmitted(val);
+        emitters::LLVMValue outArgVal = compiler.EnsurePortEmitted(argVal);
+        auto inputType = GetPortVariableType(input);
+        auto numInputs = input.Size();
+
+        emitters::LLVMValue bestVal = function.Variable(inputType, "bestVal");
+        emitters::LLVMValue bestIndex = function.Variable(ell::emitters::VariableType::Int32, "bestArgVal");
+
+        emitters::LLVMValue val0 = compiler.LoadPortElementVariable(input.GetInputElement(0));
+        function.Store(bestVal, val0);
+        function.StoreZero(bestIndex);
+
+        for (size_t i = 1; i < numInputs; ++i)
+        {
+            emitters::LLVMValue val = compiler.LoadPortElementVariable(input.GetInputElement(i));
+            function.If(GetComparison(), val, function.Load(bestVal), [bestVal, bestIndex, val, i](auto& function) {
+                function.Store(bestVal, val);
+                function.Store(bestIndex, function.Literal(static_cast<int>(i)));
+            });
+        }
+
+        function.Store(outVal, function.Load(bestVal));
+        function.Store(outArgVal, function.Load(bestIndex));
+    }
+
+    template <typename ValueType, bool max>
+    void ExtremalValueNode<ValueType, max>::WriteToArchive(utilities::Archiver& archiver) const
+    {
+        Node::WriteToArchive(archiver);
+        archiver[inputPortName] << _input;
+        archiver[valPortName] << _val;
+        archiver[argValPortName] << _argVal;
+    }
+
+    template <typename ValueType, bool max>
+    void ExtremalValueNode<ValueType, max>::ReadFromArchive(utilities::Unarchiver& archiver)
+    {
+        Node::ReadFromArchive(archiver);
+        archiver[inputPortName] >> _input;
+        archiver[valPortName] >> _val;
+        archiver[argValPortName] >> _argVal;
+    }
+
+    //
+    // Copy definitions for subclasses
+    //
+    template <typename ValueType>
+    void ArgMinNode<ValueType>::Copy(model::ModelTransformer& transformer) const
+    {
+        const auto& newPortElements = transformer.GetCorrespondingInputs(this->_input);
+        auto newNode = transformer.AddNode<ArgMinNode<ValueType>>(newPortElements);
+        transformer.MapNodeOutput(this->val, newNode->val);
+        transformer.MapNodeOutput(this->argVal, newNode->argVal);
+    }
+
+    template <typename ValueType>
+    void ArgMaxNode<ValueType>::Copy(model::ModelTransformer& transformer) const
+    {
+        const auto& newPortElements = transformer.GetCorrespondingInputs(this->_input);
+        auto newNode = transformer.AddNode<ArgMaxNode<ValueType>>(newPortElements);
+        transformer.MapNodeOutput(this->val, newNode->val);
+        transformer.MapNodeOutput(this->argVal, newNode->argVal);
+    }
+} // namespace nodes
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/nodes/include/ForestPredictorNode.h b/libraries/nodes/include/ForestPredictorNode.h
index 3a9d4e4b4..a2ffe5bfb 100644
--- a/libraries/nodes/include/ForestPredictorNode.h
+++ b/libraries/nodes/include/ForestPredictorNode.h
@@ -23,8 +23,8 @@
 #include <predictors/include/ForestPredictor.h>
 #include <predictors/include/SingleElementThresholdPredictor.h>
 
-#include <string>
 #include <memory>
+#include <string>
 #include <vector>
 
 namespace ell
@@ -98,4 +98,184 @@ namespace nodes
 } // namespace nodes
 } // namespace ell
 
-#include "../tcc/ForestPredictorNode.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace nodes
+{
+    template <typename SplitRuleType, typename EdgePredictorType>
+    ForestPredictorNode<SplitRuleType, EdgePredictorType>::ForestPredictorNode(const model::OutputPort<double>& input, const predictors::ForestPredictor<SplitRuleType, EdgePredictorType>& forest) :
+        Node({ &_input }, { &_output, &_treeOutputs, &_edgeIndicatorVector }),
+        _input(this, input, defaultInputPortName),
+        _output(this, defaultOutputPortName, 1),
+        _treeOutputs(this, treeOutputsPortName, forest.NumTrees()),
+        _edgeIndicatorVector(this, edgeIndicatorVectorPortName, forest.NumEdges()),
+        _forest(forest)
+    {
+    }
+
+    template <typename SplitRuleType, typename EdgePredictorType>
+    ForestPredictorNode<SplitRuleType, EdgePredictorType>::ForestPredictorNode() :
+        Node({ &_input }, { &_output, &_treeOutputs, &_edgeIndicatorVector }),
+        _input(this, {}, defaultInputPortName),
+        _output(this, defaultOutputPortName, 1),
+        _treeOutputs(this, treeOutputsPortName, 0),
+        _edgeIndicatorVector(this, edgeIndicatorVectorPortName, 0)
+    {
+    }
+
+    template <typename SplitRuleType, typename EdgePredictorType>
+    void ForestPredictorNode<SplitRuleType, EdgePredictorType>::WriteToArchive(utilities::Archiver& archiver) const
+    {
+        Node::WriteToArchive(archiver);
+        archiver[defaultInputPortName] << _input;
+        archiver["forest"] << _forest;
+    }
+
+    template <typename SplitRuleType, typename EdgePredictorType>
+    void ForestPredictorNode<SplitRuleType, EdgePredictorType>::ReadFromArchive(utilities::Unarchiver& archiver)
+    {
+        Node::ReadFromArchive(archiver);
+        archiver[defaultInputPortName] >> _input;
+        archiver["forest"] >> _forest;
+
+        _treeOutputs.SetSize(_forest.NumTrees());
+        _edgeIndicatorVector.SetSize(_forest.NumEdges());
+    }
+
+    template <typename SplitRuleType, typename EdgePredictorType>
+    void ForestPredictorNode<SplitRuleType, EdgePredictorType>::Copy(model::ModelTransformer& transformer) const
+    {
+        const auto& newPortElements = transformer.GetCorrespondingInputs(_input);
+        auto newNode = transformer.AddNode<ForestPredictorNode<SplitRuleType, EdgePredictorType>>(newPortElements, _forest);
+        transformer.MapNodeOutput(output, newNode->output);
+        transformer.MapNodeOutput(treeOutputs, newNode->treeOutputs);
+        transformer.MapNodeOutput(edgeIndicatorVector, newNode->edgeIndicatorVector);
+    }
+
+    template <typename SplitRuleType, typename EdgePredictorType>
+    bool ForestPredictorNode<SplitRuleType, EdgePredictorType>::Refine(model::ModelTransformer& transformer) const
+    {
+        const auto& newPortElements = transformer.GetCorrespondingInputs(_input);
+        const auto& interiorNodes = _forest.GetInteriorNodes();
+
+        // create a place to store references to the output ports of the sub-models at each interior node
+        std::vector<model::PortElements<bool>> interiorNodeSplitIndicators(interiorNodes.size());
+        std::vector<model::PortElements<double>> interiorNodeSubModels(interiorNodes.size());
+
+        // visit interior nodes bottom-up (in reverse topological order)
+        for (int nodeIndex = static_cast<int>(interiorNodes.size()) - 1; nodeIndex >= 0; --nodeIndex) // Note: index var must be signed or else end condition is never met
+        {
+            const auto& edges = interiorNodes[nodeIndex].GetOutgoingEdges();
+
+            // get the sub-model that represents each outgoing edge
+            model::PortElements<double> edgeOutputs;
+            for (size_t j = 0; j < edges.size(); ++j)
+            {
+                const auto& edgePredictor = edges[j].GetPredictor();
+                auto edgePredictorNode = AddNodeToModelTransformer(newPortElements, edgePredictor, transformer);
+
+                if (edges[j].IsTargetInterior()) // target node is itself an interior node: reverse topological order guarantees that it's already visited
+                {
+                    model::PortElements<double> elements = interiorNodeSubModels[edges[j].GetTargetNodeIndex()];
+
+                    auto sumNode = transformer.AddNode<BinaryOperationNode<double>>(edgePredictorNode->output, elements, emitters::BinaryOperationType::add);
+                    edgeOutputs.Append(sumNode->output);
+                }
+                else // target node is a leaf
+                {
+                    edgeOutputs.Append(edgePredictorNode->output);
+                }
+            }
+
+            // add the sub-model that computes the split rule
+            auto splitRuleNode = AddNodeToModelTransformer(newPortElements, interiorNodes[nodeIndex].GetSplitRule(), transformer);
+            interiorNodeSplitIndicators[nodeIndex] = { splitRuleNode->output };
+
+            // ...and selects the output value
+            auto selectorNode = transformer.AddNode<MultiplexerNode<double, bool>>(edgeOutputs, splitRuleNode->output);
+            interiorNodeSubModels[nodeIndex] = { selectorNode->output };
+        }
+
+        // Now compute the edge indicator vector
+        auto trueNode = transformer.AddNode<ConstantNode<bool>>(true); // the constant 'true'
+        std::vector<model::PortElements<bool>> edgeIndicatorSubModels(_forest.NumEdges());
+
+        // Vector with index of the incoming edge for each internal node (with sentinel value of -1 for tree roots)
+        std::vector<int> incomingEdgeIndices(interiorNodes.size(), -1);
+        for (size_t nodeIndex = 0; nodeIndex < interiorNodes.size(); ++nodeIndex)
+        {
+            auto parentEdgeIndex = incomingEdgeIndices[nodeIndex];
+            auto isRoot = parentEdgeIndex == -1;
+            const auto& edgeSelector = interiorNodeSplitIndicators[nodeIndex];
+            const auto& node = interiorNodes[nodeIndex];
+            const auto& childEdges = node.GetOutgoingEdges();
+            auto numChildren = childEdges.size();
+            model::PortElements<bool> parentIndicator = isRoot ? trueNode->output : edgeIndicatorSubModels[parentEdgeIndex];
+
+            // The Demultiplexer node computes the indicator value for all the children at once, by copying its input value (a '1' if it's the root)
+            // to the selected child.
+            auto muxNode = transformer.AddNode<DemultiplexerNode<bool, bool>>(parentIndicator, edgeSelector, numChildren);
+            for (size_t edgePosition = 0; edgePosition < numChildren; ++edgePosition)
+            {
+                auto edgeIndex = node.GetFirstEdgeIndex() + edgePosition;
+                model::PortElements<bool> childOut = { muxNode->output, edgePosition };
+                edgeIndicatorSubModels[edgeIndex] = childOut;
+
+                // If this edge's target node has an outgoing edge, record ourself as its parent
+                if (childEdges[edgePosition].IsTargetInterior())
+                {
+                    auto childNode = childEdges[edgePosition].GetTargetNodeIndex();
+                    incomingEdgeIndices[childNode] = static_cast<int>(edgeIndex);
+                }
+            }
+        }
+        // collect the individual entries for the indicator vector into a single PortElements object
+        model::PortElements<bool> edgeIndicatorVectorElements(edgeIndicatorSubModels);
+
+        // collect the sub-models that represent the trees of the forest
+        model::PortElements<double> treeSubModels;
+        for (size_t rootIndex : _forest.GetRootIndices())
+        {
+            treeSubModels.Append(interiorNodeSubModels[rootIndex]);
+        }
+
+        // Make a copy and add the bias term
+        auto treesPlusBias = treeSubModels;
+        auto biasNode = transformer.AddNode<ConstantNode<double>>(_forest.GetBias());
+        treesPlusBias.Append(biasNode->output);
+
+        // Sum all of the trees
+        auto sumNode = transformer.AddNode<SumNode<double>>(treesPlusBias);
+
+        // Map all the outputs from the original node to the refined model outputs
+        transformer.MapNodeOutput(output, sumNode->output);
+        transformer.MapNodeOutput(treeOutputs, treeSubModels);
+        transformer.MapNodeOutput(edgeIndicatorVector, edgeIndicatorVectorElements);
+        return true;
+    }
+
+    template <typename SplitRuleType, typename EdgePredictorType>
+    void ForestPredictorNode<SplitRuleType, EdgePredictorType>::Compute() const
+    {
+        // forest output
+        auto inputDataVector = typename ForestPredictor::DataVectorType(_input.GetValue());
+        _output.SetOutput({ _forest.Predict(inputDataVector) });
+
+        // individual tree outputs
+        std::vector<double> treeOutputs(_forest.NumTrees());
+        for (size_t i = 0; i < _forest.NumTrees(); ++i)
+        {
+            treeOutputs[i] = _forest.Predict(inputDataVector, _forest.GetRootIndex(i));
+        }
+        _treeOutputs.SetOutput(std::move(treeOutputs));
+
+        // path indicator
+        auto edgeIndicator = _forest.GetEdgeIndicatorVector(inputDataVector);
+        _edgeIndicatorVector.SetOutput(std::move(edgeIndicator));
+    }
+} // namespace nodes
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/nodes/include/HammingWindowNode.h b/libraries/nodes/include/HammingWindowNode.h
index 657e59a46..fb9968138 100644
--- a/libraries/nodes/include/HammingWindowNode.h
+++ b/libraries/nodes/include/HammingWindowNode.h
@@ -74,4 +74,74 @@ namespace nodes
 } // namespace nodes
 } // namespace ell
 
-#include "../tcc/HammingWindowNode.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace nodes
+{
+    template <typename ValueType>
+    HammingWindowNode<ValueType>::HammingWindowNode() :
+        Node({ &_input }, { &_output }),
+        _input(this, {}, defaultInputPortName),
+        _output(this, defaultOutputPortName, 0)
+    {
+    }
+
+    template <typename ValueType>
+    HammingWindowNode<ValueType>::HammingWindowNode(const model::OutputPort<ValueType>& input) :
+        Node({ &_input }, { &_output }),
+        _input(this, input, defaultInputPortName),
+        _output(this, defaultOutputPortName, input.Size())
+    {
+    }
+
+    template <typename ValueType>
+    void HammingWindowNode<ValueType>::Compute() const
+    {
+        auto size = _input.Size();
+        auto window = dsp::HammingWindow<ValueType>(size);
+        auto result = std::vector<ValueType>(size);
+        for (size_t index = 0; index < size; index++)
+        {
+            result[index] = _input[index] * window[index];
+        }
+        _output.SetOutput(result);
+    }
+
+    template <typename ValueType>
+    void HammingWindowNode<ValueType>::Copy(model::ModelTransformer& transformer) const
+    {
+        const auto& newPortElements = transformer.GetCorrespondingInputs(_input);
+        auto newNode = transformer.AddNode<HammingWindowNode<ValueType>>(newPortElements);
+        transformer.MapNodeOutput(output, newNode->output);
+    }
+
+    template <typename ValueType>
+    bool HammingWindowNode<ValueType>::Refine(model::ModelTransformer& transformer) const
+    {
+        const auto& newPortElements = transformer.GetCorrespondingInputs(_input);
+        auto constantNode = transformer.AddNode<ConstantNode<ValueType>>(dsp::HammingWindow<ValueType>(_input.Size()));
+        auto multiplyNode = transformer.AddNode<BinaryOperationNode<ValueType>>(newPortElements, constantNode->output, emitters::BinaryOperationType::coordinatewiseMultiply);
+        transformer.MapNodeOutput(output, multiplyNode->output);
+        return true;
+    }
+
+    template <typename ValueType>
+    void HammingWindowNode<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
+    {
+        Node::WriteToArchive(archiver);
+        archiver[defaultInputPortName] << _input;
+    }
+
+    template <typename ValueType>
+    void HammingWindowNode<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
+    {
+        Node::ReadFromArchive(archiver);
+        archiver[defaultInputPortName] >> _input;
+        _output.SetSize(_input.Size());
+    }
+} // namespace nodes
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/nodes/include/L2NormSquaredNode.h b/libraries/nodes/include/L2NormSquaredNode.h
index 7ba067444..b41094cff 100644
--- a/libraries/nodes/include/L2NormSquaredNode.h
+++ b/libraries/nodes/include/L2NormSquaredNode.h
@@ -75,4 +75,74 @@ namespace nodes
 } // namespace nodes
 } // namespace ell
 
-#include "../tcc/L2NormSquaredNode.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace nodes
+{
+    template <typename ValueType>
+    L2NormSquaredNode<ValueType>::L2NormSquaredNode() :
+        Node({ &_input }, { &_output }),
+        _input(this, {}, defaultInputPortName),
+        _output(this, defaultOutputPortName, 1)
+    {
+    }
+
+    template <typename ValueType>
+    L2NormSquaredNode<ValueType>::L2NormSquaredNode(const model::OutputPort<ValueType>& input) :
+        Node({ &_input }, { &_output }),
+        _input(this, input, defaultInputPortName),
+        _output(this, defaultOutputPortName, 1)
+    {
+    }
+
+    template <typename ValueType>
+    void L2NormSquaredNode<ValueType>::Compute() const
+    {
+        ValueType result = 0;
+        for (size_t index = 0; index < _input.Size(); ++index)
+        {
+            auto v = _input[index];
+            result += (v * v);
+        }
+        _output.SetOutput({ result });
+    };
+
+    template <typename ValueType>
+    void L2NormSquaredNode<ValueType>::Copy(model::ModelTransformer& transformer) const
+    {
+        const auto& newPortElements = transformer.GetCorrespondingInputs(_input);
+        auto newNode = transformer.AddNode<L2NormSquaredNode<ValueType>>(newPortElements);
+        transformer.MapNodeOutput(output, newNode->output);
+    }
+
+    template <typename ValueType>
+    bool L2NormSquaredNode<ValueType>::Refine(model::ModelTransformer& transformer) const
+    {
+        const auto& newPortElements = transformer.GetCorrespondingInputs(_input);
+
+        auto squareInputNode = transformer.AddNode<UnaryOperationNode<ValueType>>(newPortElements, emitters::UnaryOperationType::square);
+        auto sumNode = transformer.AddNode<SumNode<ValueType>>(squareInputNode->output);
+
+        transformer.MapNodeOutput(output, sumNode->output);
+        return true;
+    }
+
+    template <typename ValueType>
+    void L2NormSquaredNode<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
+    {
+        Node::WriteToArchive(archiver);
+        archiver[defaultInputPortName] << _input;
+    }
+
+    template <typename ValueType>
+    void L2NormSquaredNode<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
+    {
+        Node::ReadFromArchive(archiver);
+        archiver[defaultInputPortName] >> _input;
+    }
+} // namespace nodes
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/nodes/include/LinearPredictorNode.h b/libraries/nodes/include/LinearPredictorNode.h
index 37bab995e..b73f4abfd 100644
--- a/libraries/nodes/include/LinearPredictorNode.h
+++ b/libraries/nodes/include/LinearPredictorNode.h
@@ -99,4 +99,93 @@ namespace nodes
 } // namespace nodes
 } // namespace ell
 
-#include "../tcc/LinearPredictorNode.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace nodes
+{
+    template <typename ElementType>
+    LinearPredictorNode<ElementType>::LinearPredictorNode() :
+        Node({ &_input }, { &_output, &_weightedElements }),
+        _input(this, {}, defaultInputPortName),
+        _output(this, defaultOutputPortName, 1),
+        _weightedElements(this, weightedElementsPortName, 0)
+    {
+    }
+
+    template <typename ElementType>
+    LinearPredictorNode<ElementType>::LinearPredictorNode(const model::OutputPort<ElementType>& input, const predictors::LinearPredictor<ElementType>& predictor) :
+        Node({ &_input }, { &_output, &_weightedElements }),
+        _input(this, input, defaultInputPortName),
+        _output(this, defaultOutputPortName, 1),
+        _weightedElements(this, weightedElementsPortName, input.Size()),
+        _predictor(predictor)
+    {
+        if (input.Size() != predictor.Size())
+        {
+            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "LinearPredictorNode: input size must match the predictor size");
+        }
+    }
+
+    template <typename ElementType>
+    void LinearPredictorNode<ElementType>::WriteToArchive(utilities::Archiver& archiver) const
+    {
+        Node::WriteToArchive(archiver);
+        archiver[defaultInputPortName] << _input;
+        archiver["weightedElements"] << _weightedElements;
+        archiver["predictor"] << _predictor;
+    }
+
+    template <typename ElementType>
+    void LinearPredictorNode<ElementType>::ReadFromArchive(utilities::Unarchiver& archiver)
+    {
+        Node::ReadFromArchive(archiver);
+        archiver[defaultInputPortName] >> _input;
+        archiver["weightedElements"] >> _weightedElements;
+        archiver["predictor"] >> _predictor;
+    }
+
+    template <typename ElementType>
+    void LinearPredictorNode<ElementType>::Copy(model::ModelTransformer& transformer) const
+    {
+        const auto& newPortElements = transformer.GetCorrespondingInputs(_input);
+        auto newNode = transformer.AddNode<LinearPredictorNode>(newPortElements, _predictor);
+        transformer.MapNodeOutput(output, newNode->output);
+        transformer.MapNodeOutput(weightedElements, newNode->weightedElements);
+    }
+
+    template <typename ElementType>
+    bool LinearPredictorNode<ElementType>::Refine(model::ModelTransformer& transformer) const
+    {
+        const auto& newPortElements = transformer.GetCorrespondingInputs(_input);
+
+        auto weightsNode = transformer.AddNode<ConstantNode<ElementType>>(_predictor.GetWeights().ToArray());
+        auto dotProductNode = transformer.AddNode<DotProductNode<ElementType>>(weightsNode->output, newPortElements);
+        auto coordinatewiseMultiplyNode = transformer.AddNode<BinaryOperationNode<ElementType>>(weightsNode->output, newPortElements, emitters::BinaryOperationType::coordinatewiseMultiply);
+        auto biasNode = transformer.AddNode<ConstantNode<ElementType>>(_predictor.GetBias());
+        auto addNode = transformer.AddNode<BinaryOperationNode<ElementType>>(dotProductNode->output, biasNode->output, emitters::BinaryOperationType::add);
+
+        transformer.MapNodeOutput(output, addNode->output);
+        transformer.MapNodeOutput(weightedElements, coordinatewiseMultiplyNode->output);
+        return true;
+    }
+
+    template <typename ElementType>
+    void LinearPredictorNode<ElementType>::Compute() const
+    {
+        using DataVectorType = typename LinearPredictorType::DataVectorType;
+        auto inputDataVector = DataVectorType(_input.GetValue());
+        _output.SetOutput({ _predictor.Predict(inputDataVector) });
+        _weightedElements.SetOutput(_predictor.GetWeightedElements(inputDataVector).ToArray());
+    }
+
+    template <typename ElementType>
+    LinearPredictorNode<ElementType>* AddNodeToModelTransformer(const model::PortElements<ElementType>& input, const predictors::LinearPredictor<ElementType>& predictor, model::ModelTransformer& transformer)
+    {
+        return transformer.AddNode<LinearPredictorNode<ElementType>>(input, predictor);
+    }
+} // namespace nodes
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/nodes/include/MatrixVectorProductNode.h b/libraries/nodes/include/MatrixVectorProductNode.h
index 3aa2df498..45d0e03ac 100644
--- a/libraries/nodes/include/MatrixVectorProductNode.h
+++ b/libraries/nodes/include/MatrixVectorProductNode.h
@@ -20,8 +20,8 @@
 
 #include <utilities/include/Exception.h>
 
-#include <vector>
 #include <string>
+#include <vector>
 
 namespace ell
 {
@@ -97,4 +97,118 @@ namespace nodes
 } // namespace nodes
 } // namespace ell
 
-#include "../tcc/MatrixVectorProductNode.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace nodes
+{
+    template <typename ValueType, math::MatrixLayout layout>
+    MatrixVectorProductNode<ValueType, layout>::MatrixVectorProductNode() :
+        Node({ &_input }, { &_output }),
+        _input(this, {}, defaultInputPortName),
+        _output(this, defaultOutputPortName, 1),
+        _w(0, 0)
+    {
+    }
+
+    template <typename ValueType, math::MatrixLayout layout>
+    MatrixVectorProductNode<ValueType, layout>::MatrixVectorProductNode(const model::OutputPort<ValueType>& input, const math::Matrix<ValueType, layout>& w) :
+        Node({ &_input }, { &_output }),
+        _input(this, input, defaultInputPortName),
+        _output(this, defaultOutputPortName, w.NumRows()),
+        _w(w)
+    {
+        if (input.Size() != w.NumColumns())
+        {
+            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "MatrixVectorProductNode: input size must match the number of columns in the 'w' matrix");
+        }
+    }
+
+    template <typename ValueType, math::MatrixLayout layout>
+    void MatrixVectorProductNode<ValueType, layout>::WriteToArchive(utilities::Archiver& archiver) const
+    {
+        Node::WriteToArchive(archiver);
+
+        archiver["w_rows"] << _w.NumRows();
+        archiver["w_columns"] << _w.NumColumns();
+        std::vector<ValueType> temp;
+        temp.assign(_w.GetConstDataPointer(), _w.GetConstDataPointer() + (size_t)(_w.NumRows() * _w.NumColumns()));
+        archiver["w"] << temp;
+
+        archiver[defaultInputPortName] << _input;
+        archiver[defaultOutputPortName] << _output;
+    }
+
+    template <typename ValueType, math::MatrixLayout layout>
+    void MatrixVectorProductNode<ValueType, layout>::ReadFromArchive(utilities::Unarchiver& archiver)
+    {
+        Node::ReadFromArchive(archiver);
+
+        size_t w_rows = 0;
+        size_t w_columns = 0;
+        archiver["w_rows"] >> w_rows;
+        archiver["w_columns"] >> w_columns;
+        std::vector<ValueType> temp;
+        archiver["w"] >> temp;
+        _w = math::Matrix<ValueType, layout>(w_rows, w_columns, temp);
+
+        archiver[defaultInputPortName] >> _input;
+        archiver[defaultOutputPortName] >> _output;
+    }
+
+    template <typename ValueType, math::MatrixLayout layout>
+    void MatrixVectorProductNode<ValueType, layout>::Copy(model::ModelTransformer& transformer) const
+    {
+        const auto& newPortElements = transformer.GetCorrespondingInputs(_input);
+        auto newNode = transformer.AddNode<MatrixVectorProductNode<ValueType, layout>>(newPortElements, _w);
+        transformer.MapNodeOutput(output, newNode->output);
+    }
+
+    template <typename ValueType, math::MatrixLayout layout>
+    bool MatrixVectorProductNode<ValueType, layout>::Refine(model::ModelTransformer& transformer) const
+    {
+        const auto& newInput = transformer.GetCorrespondingInputs(_input);
+
+        // Make sure we have a RowMatrix (because that's what MatrixVectorMultiplyNode wants)
+        math::RowMatrix<ValueType> projectionMatrix(_w);
+        auto projectionMatrixValue = projectionMatrix.ToArray();
+        auto projectionMatrixNode = transformer.AddNode<ConstantNode<ValueType>>(projectionMatrixValue);
+        auto m = projectionMatrix.NumRows();
+        auto n = projectionMatrix.NumColumns();
+        auto matrixStride = projectionMatrix.GetIncrement();
+        if (matrixStride == 0 || matrixStride < m)
+        {
+            utilities::InputException(utilities::InputExceptionErrors::badData, "Matrix has an invalid stride");
+        }
+        auto matrixMultiplyNode = transformer.AddNode<MatrixVectorMultiplyNode<ValueType>>(projectionMatrixNode->output, m, n, matrixStride, newInput);
+        transformer.MapNodeOutput(output, matrixMultiplyNode->output);
+        return true;
+    }
+
+    template <typename ValueType, math::MatrixLayout layout>
+    void MatrixVectorProductNode<ValueType, layout>::Compute() const
+    {
+        math::ColumnVector<ValueType> input(_input.Size());
+        for (size_t index = 0; index < _input.Size(); ++index)
+        {
+            input[index] = _input[index];
+        }
+
+        math::ColumnVector<ValueType> result(_w.NumRows());
+
+        // result = _w * data
+        math::MultiplyScaleAddUpdate(static_cast<ValueType>(1), _w, input, static_cast<ValueType>(0), result);
+
+        _output.SetOutput(result.ToArray());
+    }
+
+    template <typename ValueType, math::MatrixLayout layout>
+    MatrixVectorProductNode<ValueType, layout>* AddNodeToModelTransformer(const model::PortElements<ValueType>& input, math::ConstMatrixReference<ValueType, layout> w, model::ModelTransformer& transformer)
+    {
+        return transformer.AddNode<MatrixVectorProductNode>(input, w);
+    }
+} // namespace nodes
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/nodes/include/MovingAverageNode.h b/libraries/nodes/include/MovingAverageNode.h
index 5369a3073..0b0704efc 100644
--- a/libraries/nodes/include/MovingAverageNode.h
+++ b/libraries/nodes/include/MovingAverageNode.h
@@ -83,4 +83,101 @@ namespace nodes
 } // namespace nodes
 } // namespace ell
 
-#include "../tcc/MovingAverageNode.tcc"
\ No newline at end of file
+#pragma region implementation
+
+namespace ell
+{
+namespace nodes
+{
+    template <typename ValueType>
+    MovingAverageNode<ValueType>::MovingAverageNode() :
+        Node({ &_input }, { &_output }),
+        _input(this, {}, defaultInputPortName),
+        _output(this, defaultOutputPortName, 0),
+        _windowSize(0)
+    {
+    }
+
+    template <typename ValueType>
+    MovingAverageNode<ValueType>::MovingAverageNode(const model::OutputPort<ValueType>& input, size_t windowSize) :
+        Node({ &_input }, { &_output }),
+        _input(this, input, defaultInputPortName),
+        _output(this, defaultOutputPortName, _input.Size()),
+        _windowSize(windowSize)
+    {
+        auto dimension = _input.Size();
+        for (size_t index = 0; index < _windowSize; ++index)
+        {
+            _samples.push_back(std::vector<ValueType>(dimension));
+        }
+        _runningSum = std::vector<ValueType>(dimension);
+    }
+
+    template <typename ValueType>
+    void MovingAverageNode<ValueType>::Compute() const
+    {
+        auto inputSample = _input.GetValue();
+        auto lastBufferedSample = _samples[0];
+        _samples.push_back(inputSample);
+        _samples.erase(_samples.begin());
+
+        std::vector<ValueType> result(_input.Size());
+        for (size_t index = 0; index < inputSample.size(); ++index)
+        {
+            _runningSum[index] += (inputSample[index] - lastBufferedSample[index]);
+            result[index] = _runningSum[index] / _windowSize;
+        }
+        _output.SetOutput(result);
+    };
+
+    template <typename ValueType>
+    void MovingAverageNode<ValueType>::Copy(model::ModelTransformer& transformer) const
+    {
+        const auto& newPortElements = transformer.GetCorrespondingInputs(_input);
+        auto newNode = transformer.AddNode<MovingAverageNode<ValueType>>(newPortElements, _windowSize);
+        transformer.MapNodeOutput(output, newNode->output);
+    }
+
+    template <typename ValueType>
+    bool MovingAverageNode<ValueType>::Refine(model::ModelTransformer& transformer) const
+    {
+        const auto& newPortElements = transformer.GetCorrespondingInputs(_input);
+        auto delayNode = transformer.AddNode<DelayNode<ValueType>>(newPortElements, _windowSize);
+        auto subtractNode = transformer.AddNode<BinaryOperationNode<ValueType>>(newPortElements, delayNode->output, emitters::BinaryOperationType::subtract);
+        auto accumNode = transformer.AddNode<AccumulatorNode<ValueType>>(subtractNode->output);
+        std::vector<ValueType> literalN(newPortElements.Size(), (ValueType)_windowSize);
+        auto constNode = transformer.AddNode<ConstantNode<ValueType>>(literalN);
+        auto divideNode = transformer.AddNode<BinaryOperationNode<ValueType>>(accumNode->output, constNode->output, emitters::BinaryOperationType::coordinatewiseDivide);
+        transformer.MapNodeOutput(output, divideNode->output);
+        return true;
+    }
+
+    template <typename ValueType>
+    void MovingAverageNode<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
+    {
+        Node::WriteToArchive(archiver);
+        archiver[defaultInputPortName] << _input;
+        archiver["windowSize"] << _windowSize;
+    }
+
+    template <typename ValueType>
+    void MovingAverageNode<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
+    {
+        Node::ReadFromArchive(archiver);
+        archiver[defaultInputPortName] >> _input;
+        archiver["windowSize"] >> _windowSize;
+
+        auto dimension = _input.Size();
+        _samples.clear();
+        _samples.reserve(_windowSize);
+        for (size_t index = 0; index < _windowSize; ++index)
+        {
+            _samples.push_back(std::vector<ValueType>(dimension));
+        }
+        _runningSum = std::vector<ValueType>(dimension);
+        _output.SetSize(dimension);
+    }
+} // namespace nodes
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/nodes/include/MovingVarianceNode.h b/libraries/nodes/include/MovingVarianceNode.h
index 822813fe5..699dc530b 100644
--- a/libraries/nodes/include/MovingVarianceNode.h
+++ b/libraries/nodes/include/MovingVarianceNode.h
@@ -76,4 +76,89 @@ namespace nodes
 } // namespace nodes
 } // namespace ell
 
-#include "../tcc/MovingVarianceNode.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace nodes
+{
+    template <typename ValueType>
+    MovingVarianceNode<ValueType>::MovingVarianceNode() :
+        Node({ &_input }, { &_output }),
+        _input(this, {}, defaultInputPortName),
+        _output(this, defaultOutputPortName, 0),
+        _windowSize(0)
+    {
+    }
+
+    template <typename ValueType>
+    MovingVarianceNode<ValueType>::MovingVarianceNode(const model::OutputPort<ValueType>& input, size_t windowSize) :
+        Node({ &_input }, { &_output }),
+        _input(this, input, defaultInputPortName),
+        _output(this, defaultOutputPortName, _input.Size()),
+        _windowSize(windowSize)
+    {
+        auto dimension = _input.Size();
+        for (size_t index = 0; index < _windowSize; ++index)
+        {
+            _samples.push_back(std::vector<ValueType>(dimension));
+        }
+        _runningSum = std::vector<ValueType>(dimension);
+        _runningSquaredSum = std::vector<ValueType>(dimension);
+    }
+
+    template <typename ValueType>
+    void MovingVarianceNode<ValueType>::Compute() const
+    {
+        static auto squared = [](const ValueType& x) { return x * x; };
+
+        auto inputSample = _input.GetValue();
+        auto lastBufferedSample = _samples[0];
+        _samples.push_back(inputSample);
+        _samples.erase(_samples.begin());
+
+        std::vector<ValueType> result(_input.Size());
+        for (size_t index = 0; index < inputSample.size(); ++index)
+        {
+            _runningSum[index] += (inputSample[index] - lastBufferedSample[index]);
+            _runningSquaredSum[index] += squared(inputSample[index]) - squared(lastBufferedSample[index]);
+            result[index] = (_runningSquaredSum[index] - (squared(_runningSum[index]) / _windowSize)) / _windowSize;
+        }
+        _output.SetOutput(result);
+    };
+
+    template <typename ValueType>
+    void MovingVarianceNode<ValueType>::Copy(model::ModelTransformer& transformer) const
+    {
+        const auto& newPortElements = transformer.GetCorrespondingInputs(_input);
+        auto newNode = transformer.AddNode<MovingVarianceNode<ValueType>>(newPortElements, _windowSize);
+        transformer.MapNodeOutput(output, newNode->output);
+    }
+
+    template <typename ValueType>
+    void MovingVarianceNode<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
+    {
+        Node::WriteToArchive(archiver);
+        archiver[defaultInputPortName] << _input;
+        archiver["windowSize"] << _windowSize;
+    }
+
+    template <typename ValueType>
+    void MovingVarianceNode<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
+    {
+        Node::ReadFromArchive(archiver);
+        archiver[defaultInputPortName] >> _input;
+        archiver["windowSize"] >> _windowSize;
+
+        auto dimension = _input.Size();
+        _samples.clear();
+        _samples.reserve(_windowSize);
+        std::generate_n(std::back_inserter(_samples), _windowSize, [dimension] { return std::vector<ValueType>(dimension); });
+        _runningSum = std::vector<ValueType>(dimension);
+        _runningSquaredSum = std::vector<ValueType>(dimension);
+        _output.SetSize(dimension);
+    }
+} // namespace nodes
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/nodes/include/MultiplexerNode.h b/libraries/nodes/include/MultiplexerNode.h
index 360011079..424511f0d 100644
--- a/libraries/nodes/include/MultiplexerNode.h
+++ b/libraries/nodes/include/MultiplexerNode.h
@@ -82,4 +82,138 @@ namespace nodes
 } // namespace nodes
 } // namespace ell
 
-#include "../tcc/MultiplexerNode.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace nodes
+{
+    template <typename ValueType, typename SelectorType>
+    MultiplexerNode<ValueType, SelectorType>::MultiplexerNode() :
+        CompilableNode({ &_elements, &_selector }, { &_output }),
+        _elements(this, {}, elementsPortName),
+        _selector(this, {}, selectorPortName),
+        _output(this, defaultOutputPortName, 1)
+    {
+    }
+
+    template <typename ValueType, typename SelectorType>
+    MultiplexerNode<ValueType, SelectorType>::MultiplexerNode(const model::OutputPort<ValueType>& input, const model::OutputPort<SelectorType>& selector) :
+        CompilableNode({ &_elements, &_selector }, { &_output }),
+        _elements(this, input, elementsPortName),
+        _selector(this, selector, selectorPortName),
+        _output(this, defaultOutputPortName, 1)
+    {
+        if (selector.Size() != 1)
+        {
+            throw ell::utilities::Exception("Error: Condition must be 1-D signal");
+        }
+    };
+
+    template <typename ValueType, typename SelectorType>
+    void MultiplexerNode<ValueType, SelectorType>::Compute() const
+    {
+        int index = static_cast<int>(_selector[0]);
+        _output.SetOutput({ _elements[index] });
+    }
+
+    template <typename ValueType, typename SelectorType>
+    void MultiplexerNode<ValueType, SelectorType>::Copy(model::ModelTransformer& transformer) const
+    {
+        const auto& newElements = transformer.GetCorrespondingInputs(_elements);
+        const auto& newSelector = transformer.GetCorrespondingInputs(_selector);
+        auto newNode = transformer.AddNode<MultiplexerNode<ValueType, SelectorType>>(newElements, newSelector);
+        transformer.MapNodeOutput(output, newNode->output);
+    }
+
+    template <typename ValueType, typename SelectorType>
+    void MultiplexerNode<ValueType, SelectorType>::Compile(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
+    {
+        if (std::is_same<SelectorType, bool>())
+        {
+            CompileMultiplexerBinary(compiler, function);
+        }
+        else if (std::is_same<SelectorType, int>())
+        {
+            CompileUnrolled(compiler, function);
+        }
+        else
+        {
+            throw emitters::EmitterException(emitters::EmitterError::valueTypeNotSupported, "Multiplexer node selectors must be bool or int");
+        }
+    }
+
+    template <typename ValueType, typename SelectorType>
+    void MultiplexerNode<ValueType, SelectorType>::CompileMultiplexerBinary(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
+    {
+        VerifyIsScalar(selector);
+        VerifyIsScalar(output);
+
+        emitters::LLVMValue pSelector = compiler.EnsurePortEmitted(selector);
+        emitters::LLVMValue pSelectorVal = function.Load(pSelector);
+        emitters::LLVMValue pResult = compiler.EnsurePortEmitted(output);
+        auto lVal = elements.GetInputElement(0); // lval is selected if the result of the "if" comparison is NON-zero
+        auto rVal = elements.GetInputElement(1);
+        auto pLMergeableSrc = compiler.GetMergeableNodeRegion(lVal);
+        auto pRMergeableSrc = compiler.GetMergeableNodeRegion(rVal);
+
+        function.If(emitters::TypedComparison::equals, pSelectorVal, function.Literal<SelectorType>(0), [pLMergeableSrc, pResult, &compiler, this](emitters::IRFunctionEmitter& function) {
+                    if (pLMergeableSrc != nullptr)
+                    {
+                        function.MergeRegion(pLMergeableSrc);
+                    }
+                    function.Store(pResult, compiler.LoadPortElementVariable(elements.GetInputElement(0)));
+                })
+            .Else([pRMergeableSrc, pResult, &compiler, this](emitters::IRFunctionEmitter& function) {
+                if (pRMergeableSrc != nullptr)
+                {
+                    function.MergeRegion(pRMergeableSrc);
+                }
+                function.Store(pResult, compiler.LoadPortElementVariable(elements.GetInputElement(1)));
+            });
+
+        auto pSelectorNode = selector.GetParentNodes()[0];
+        if (HasSingleDescendant(*pSelectorNode))
+        {
+            compiler.TryMergeNodeRegions(*pSelectorNode, *this);
+        }
+    }
+
+    template <typename ValueType, typename SelectorType>
+    void MultiplexerNode<ValueType, SelectorType>::CompileUnrolled(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
+    {
+        VerifyIsScalar(selector);
+        VerifyIsScalar(output);
+        auto numElements = elements.Size();
+
+        emitters::LLVMValue pSelector = compiler.EnsurePortEmitted(selector);
+        auto pSelectorVal = function.Load(pSelector);
+        emitters::LLVMValue result = compiler.EnsurePortEmitted(output);
+        for (size_t index = 0; index < numElements; ++index)
+        {
+            function.If(emitters::TypedComparison::equals, function.Literal((int)index), pSelectorVal, [index, result, &compiler, this](emitters::IRFunctionEmitter& function) {
+                emitters::LLVMValue val = compiler.LoadPortElementVariable(elements.GetInputElement(index));
+                function.Store(result, val);
+            });
+        }
+    }
+
+    template <typename ValueType, typename SelectorType>
+    void MultiplexerNode<ValueType, SelectorType>::WriteToArchive(utilities::Archiver& archiver) const
+    {
+        Node::WriteToArchive(archiver);
+        archiver["elements"] << _elements;
+        archiver["selector"] << _selector;
+    }
+
+    template <typename ValueType, typename SelectorType>
+    void MultiplexerNode<ValueType, SelectorType>::ReadFromArchive(utilities::Unarchiver& archiver)
+    {
+        Node::ReadFromArchive(archiver);
+        archiver["elements"] >> _elements;
+        archiver["selector"] >> _selector;
+    }
+} // namespace nodes
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/nodes/include/NeuralNetworkLayerNode.h b/libraries/nodes/include/NeuralNetworkLayerNode.h
index ba87f9b16..c587967b9 100644
--- a/libraries/nodes/include/NeuralNetworkLayerNode.h
+++ b/libraries/nodes/include/NeuralNetworkLayerNode.h
@@ -157,4 +157,165 @@ namespace nodes
 } // namespace nodes
 } // namespace ell
 
-#include "../tcc/NeuralNetworkLayerNode.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace nodes
+{
+    //
+    // NeuralNetworkLayerNodeBase
+    //
+
+    template <typename ValueType>
+    NeuralNetworkLayerNodeBase<ValueType>::NeuralNetworkLayerNodeBase() :
+        CompilableNode({ &_input }, { &_output }),
+        _input(this, {}, defaultInputPortName),
+        _output(this, defaultOutputPortName, 0)
+    {
+        _parameters.includePaddingInInputData = true;
+    }
+
+    template <typename ValueType>
+    NeuralNetworkLayerNodeBase<ValueType>::NeuralNetworkLayerNodeBase(const model::OutputPort<ValueType>& input, const NeuralNetworkLayerNodeParameters& parameters, size_t outputSize) :
+        CompilableNode({ &_input }, { &_output }),
+        _input(this, input, defaultInputPortName),
+        _output(this, defaultOutputPortName, outputSize),
+        _parameters(parameters)
+    {
+    }
+
+    template <typename ValueType>
+    void NeuralNetworkLayerNodeBase<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
+    {
+        CompilableNode::WriteToArchive(archiver);
+        archiver[defaultInputPortName] << _input;
+    }
+
+    template <typename ValueType>
+    void NeuralNetworkLayerNodeBase<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
+    {
+        CompilableNode::ReadFromArchive(archiver);
+        archiver[defaultInputPortName] >> _input;
+    }
+
+    //
+    // NeuralNetworkLayerNode
+    //
+    template <typename DerivedType, typename LayerType, typename ValueType>
+    NeuralNetworkLayerNode<DerivedType, LayerType, ValueType>::NeuralNetworkLayerNode() :
+        NeuralNetworkLayerNodeBase<ValueType>(),
+        _inputShape(0, 0, 0)
+    {
+    }
+
+    template <typename DerivedType, typename LayerType, typename ValueType>
+    NeuralNetworkLayerNode<DerivedType, LayerType, ValueType>::NeuralNetworkLayerNode(const model::OutputPort<ValueType>& input, const LayerType& layer) :
+        NeuralNetworkLayerNodeBase<ValueType>(input, {}, layer.GetOutput().Size()),
+        _inputTensor(layer.GetInputShape()),
+        _layer(layer),
+        _inputShape(layer.GetInputShape())
+    {
+        _layer.GetLayerParameters().input = _inputTensor;
+
+        const auto& layerParameters = _layer.GetLayerParameters();
+
+        // Calculate input dimension parameters
+        size_t inputPaddingSize = layerParameters.inputPaddingParameters.paddingSize;
+        auto inputShape = this->GetLayer().GetInputShape();
+        _inputLayout = CalculateMemoryLayout(inputPaddingSize, inputShape);
+
+        // Calculate output dimension parameters
+        size_t outputPaddingSize = layerParameters.outputPaddingParameters.paddingSize;
+        auto outputShape = this->_layer.GetOutputShape();
+        _output.SetMemoryLayout(CalculateMemoryLayout(outputPaddingSize, outputShape));
+    }
+
+    template <typename DerivedType, typename LayerType, typename ValueType>
+    model::PortMemoryLayout NeuralNetworkLayerNode<DerivedType, LayerType, ValueType>::CalculateMemoryLayout(size_t padding, typename predictors::neural::Layer<ValueType>::Shape dataBufferSize)
+    {
+        // Calculate dimension parameters
+        math::IntegerTriplet dataSizeArray = dataBufferSize;
+        model::MemoryShape stride{ { static_cast<int>(dataSizeArray[0]), static_cast<int>(dataSizeArray[1]), static_cast<int>(dataSizeArray[2]) } };
+        model::MemoryShape offset{ static_cast<int>(padding), static_cast<int>(padding), 0 };
+        model::MemoryShape size({});
+        size.Resize(stride.NumDimensions());
+        for (int dimensionIndex = 0; dimensionIndex < offset.NumDimensions(); ++dimensionIndex)
+        {
+            if (stride[dimensionIndex] < (2 * offset[dimensionIndex]))
+            {
+                throw utilities::InputException(utilities::InputExceptionErrors::sizeMismatch, "Data size not large enough to accommodate padding");
+            }
+            size[dimensionIndex] = stride[dimensionIndex] - (2 * offset[dimensionIndex]);
+        }
+
+        return { size, stride, offset };
+    }
+
+    template <typename DerivedType, typename LayerType, typename ValueType>
+    utilities::ArchiveVersion NeuralNetworkLayerNode<DerivedType, LayerType, ValueType>::GetArchiveVersion() const
+    {
+        constexpr utilities::ArchiveVersion archiveVersion = { utilities::ArchiveVersionNumbers::v5_refined_nodes };
+
+        return archiveVersion;
+    }
+
+    template <typename DerivedType, typename LayerType, typename ValueType>
+    bool NeuralNetworkLayerNode<DerivedType, LayerType, ValueType>::CanReadArchiveVersion(const utilities::ArchiveVersion& version) const
+    {
+        constexpr utilities::ArchiveVersion archiveVersion = { utilities::ArchiveVersionNumbers::v5_refined_nodes };
+
+        return version >= archiveVersion;
+    }
+
+    template <typename DerivedType, typename LayerType, typename ValueType>
+    void NeuralNetworkLayerNode<DerivedType, LayerType, ValueType>::WriteToArchive(utilities::Archiver& archiver) const
+    {
+        NeuralNetworkLayerNodeBase<ValueType>::WriteToArchive(archiver);
+        archiver["inputLayout"] << _inputLayout;
+        archiver["outputLayout"] << GetOutputMemoryLayout();
+
+        std::vector<size_t> inputShape = _inputShape;
+        archiver["inputShape"] << inputShape;
+
+        archiver["layer"] << _layer;
+    }
+
+    template <typename DerivedType, typename LayerType, typename ValueType>
+    void NeuralNetworkLayerNode<DerivedType, LayerType, ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
+    {
+        NeuralNetworkLayerNodeBase<ValueType>::ReadFromArchive(archiver);
+        archiver["inputLayout"] >> _inputLayout;
+        model::PortMemoryLayout outputLayout;
+        archiver["outputLayout"] >> outputLayout;
+        _output.SetMemoryLayout(outputLayout);
+
+        std::vector<size_t> inputShape;
+        archiver["inputShape"] >> inputShape;
+        _inputShape = math::TensorShape{ inputShape };
+
+        _inputTensor = typename LayerType::TensorType(_inputShape);
+        _layer.GetLayerParameters().input = _inputTensor;
+        archiver["layer"] >> _layer;
+    }
+
+    template <typename DerivedType, typename LayerType, typename ValueType>
+    void NeuralNetworkLayerNode<DerivedType, LayerType, ValueType>::Compute() const
+    {
+        auto inputVector = _input.GetValue();
+        auto inputTensor = typename LayerType::ConstTensorReferenceType{ inputVector.data(), _inputTensor.GetShape() };
+        _inputTensor.CopyFrom(inputTensor);
+        _layer.Compute();
+        const auto& outputTensor = _layer.GetOutput();
+        _output.SetOutput(outputTensor.ToArray());
+    }
+
+    template <typename LayerType>
+    typename LayerType::LayerParameters GetLayerNodeParameters(const typename LayerType::TensorType& inputTensor, const typename LayerType::LayerParameters& layerParameters)
+    {
+        return { inputTensor, layerParameters.inputPaddingParameters, layerParameters.outputShape, layerParameters.outputPaddingParameters };
+    }
+} // namespace nodes
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/nodes/include/NeuralNetworkPredictorNode.h b/libraries/nodes/include/NeuralNetworkPredictorNode.h
index 1443c3669..eb560a55a 100644
--- a/libraries/nodes/include/NeuralNetworkPredictorNode.h
+++ b/libraries/nodes/include/NeuralNetworkPredictorNode.h
@@ -131,4 +131,97 @@ namespace nodes
 } // namespace nodes
 } // namespace ell
 
-#include "../tcc/NeuralNetworkPredictorNode.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace nodes
+{
+    //
+    // Helper functions
+    //
+    namespace
+    {
+        template <typename LayerType, typename LayerNodeType, typename ValueType>
+        NeuralNetworkLayerNodeBase<ValueType>* TryAddLayerNode(model::ModelTransformer& transformer, predictors::neural::Layer<ValueType>& layer, const model::OutputPort<ValueType>& layerInputs, const typename NeuralNetworkPredictorNode<ValueType>::NetworkCompileOptions& options, typename NeuralNetworkPredictorNode<ValueType>::NetworkCompileState& state)
+        {
+            auto typedLayer = dynamic_cast<LayerType*>(&layer);
+            if (typedLayer != nullptr)
+            {
+                return transformer.AddNode<LayerNodeType>(layerInputs, *typedLayer);
+            }
+            return nullptr;
+        }
+
+        template <typename LayerType, typename LayerNodeType, typename SecondValueType, typename ValueType>
+        NeuralNetworkLayerNodeBase<ValueType>* TryAddLayerNodeWithTwoInputs(model::ModelTransformer& transformer, predictors::neural::Layer<ValueType>& layer, const model::OutputPort<ValueType>& layerInputs, const model::OutputPort<SecondValueType>& secondInput, const typename NeuralNetworkPredictorNode<ValueType>::NetworkCompileOptions& options, typename NeuralNetworkPredictorNode<ValueType>::NetworkCompileState& state)
+        {
+            auto typedLayer = dynamic_cast<LayerType*>(&layer);
+            if (typedLayer != nullptr)
+            {
+                return transformer.AddNode<LayerNodeType>(layerInputs, secondInput, *typedLayer);
+            }
+            return nullptr;
+        }
+    } // namespace
+
+    template <typename ValueType>
+    NeuralNetworkLayerNodeBase<ValueType>* NeuralNetworkPredictorNode<ValueType>::AddLayerNode(model::ModelTransformer& transformer, predictors::neural::Layer<ValueType>& layer, const model::OutputPort<ValueType>& layerInputs, const NetworkCompileOptions& options, NetworkCompileState& state) const
+    {
+        NeuralNetworkLayerNodeBase<ValueType>* node = nullptr;
+
+        if (layer.template IsA<const predictors::neural::ActivationLayer<ValueType>>())
+        {
+            auto& activationLayer = layer.template As<predictors::neural::ActivationLayer<ValueType>>();
+            auto paf = dynamic_cast<const predictors::neural::ParametricReLUActivation<ValueType>*>(activationLayer.GetActivationFunction().GetImpl());
+            if (paf)
+            {
+                // Ah, then this one is special, we have to use ParametricReLUActivationLayerNode in this case.
+                return TryAddLayerNode<predictors::neural::ActivationLayer<ValueType>, ParametricReLUActivationLayerNode<ValueType>>(transformer, layer, layerInputs, options, state);
+            }
+        }
+
+        node = TryAddLayerNode<predictors::neural::ActivationLayer<ValueType>, ActivationLayerNode<ValueType>>(transformer, layer, layerInputs, options, state);
+        if (node != nullptr) return node;
+
+        node = TryAddLayerNode<predictors::neural::BatchNormalizationLayer<ValueType>, BatchNormalizationLayerNode<ValueType>>(transformer, layer, layerInputs, options, state);
+        if (node != nullptr) return node;
+
+        node = TryAddLayerNode<predictors::neural::BiasLayer<ValueType>, BiasLayerNode<ValueType>>(transformer, layer, layerInputs, options, state);
+        if (node != nullptr) return node;
+
+        node = TryAddLayerNode<predictors::neural::BinaryConvolutionalLayer<ValueType>, BinaryConvolutionalLayerNode<ValueType>>(transformer, layer, layerInputs, options, state);
+        if (node != nullptr) return node;
+
+        node = TryAddLayerNode<predictors::neural::ConvolutionalLayer<ValueType>, ConvolutionalLayerNode<ValueType>>(transformer, layer, layerInputs, options, state);
+        if (node != nullptr) return node;
+
+        node = TryAddLayerNode<predictors::neural::FullyConnectedLayer<ValueType>, FullyConnectedLayerNode<ValueType>>(transformer, layer, layerInputs, options, state);
+        if (node != nullptr) return node;
+
+        //
+        // Pooling layer
+        //
+
+        node = TryAddLayerNode<predictors::neural::PoolingLayer<ValueType, predictors::neural::MaxPoolingFunction>, PoolingLayerNode<ValueType, predictors::neural::MaxPoolingFunction>>(transformer, layer, layerInputs, options, state);
+        if (node != nullptr) return node;
+
+        node = TryAddLayerNode<predictors::neural::PoolingLayer<ValueType, predictors::neural::MeanPoolingFunction>, PoolingLayerNode<ValueType, predictors::neural::MeanPoolingFunction>>(transformer, layer, layerInputs, options, state);
+        if (node != nullptr) return node;
+
+        node = TryAddLayerNode<predictors::neural::RegionDetectionLayer<ValueType>, RegionDetectionLayerNode<ValueType>>(transformer, layer, layerInputs, options, state);
+        if (node != nullptr) return node;
+
+        node = TryAddLayerNode<predictors::neural::ScalingLayer<ValueType>, ScalingLayerNode<ValueType>>(transformer, layer, layerInputs, options, state);
+        if (node != nullptr) return node;
+
+        node = TryAddLayerNode<predictors::neural::SoftmaxLayer<ValueType>, SoftmaxLayerNode<ValueType>>(transformer, layer, layerInputs, options, state);
+        if (node != nullptr) return node;
+
+        auto name = layer.GetRuntimeTypeName();
+        throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "Unknown layer type in refine: " + name);
+    }
+} // namespace nodes
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/nodes/include/ReceptiveFieldMatrixNode.h b/libraries/nodes/include/ReceptiveFieldMatrixNode.h
index c85a7636e..b67fdf75a 100644
--- a/libraries/nodes/include/ReceptiveFieldMatrixNode.h
+++ b/libraries/nodes/include/ReceptiveFieldMatrixNode.h
@@ -102,4 +102,414 @@ namespace nodes
 } // namespace nodes
 } // namespace ell
 
-#include "../tcc/ReceptiveFieldMatrixNode.tcc"
+#pragma region implementation
+
+#include <model/include/CompilableNodeUtilities.h>
+#include <model/include/IRMapCompiler.h>
+#include <model/include/OutputNode.h>
+
+#include <utilities/include/Exception.h>
+
+#include <cassert>
+#include <string>
+#include <vector>
+
+namespace ell
+{
+namespace nodes
+{
+    namespace
+    {
+        //
+        // Functions
+        //
+
+        // Note: this function is inline to suppress a compiler warning about it being unneeded
+        inline emitters::LLVMValue GetValueFromVolume(emitters::IRFunctionEmitter& function,
+                                                      emitters::LLVMValue inputVolume,
+                                                      const model::PortMemoryLayout& inputLayout,
+                                                      std::array<int, 3> dataOrder,
+                                                      emitters::IRLocalScalar valueRow,
+                                                      emitters::IRLocalScalar valueColumn,
+                                                      emitters::IRLocalScalar valueChannel)
+        {
+            const auto rowStride = inputLayout.GetExtent(0);
+            const auto columnStride = inputLayout.GetExtent(1);
+            const auto channelStride = inputLayout.GetExtent(2);
+
+            auto index = function.LocalScalar();
+            if (dataOrder == std::array<int, 3>({ 0, 1, 2 }))
+            {
+                // row, column, channel order
+                index = valueRow * (columnStride * channelStride) + (valueColumn * channelStride) + valueChannel;
+            }
+            else
+            {
+                // channel, row, column order
+                index = valueChannel * (rowStride * columnStride) + (valueRow * columnStride) + valueColumn;
+            }
+
+            return function.ValueAt(inputVolume, index);
+        }
+
+        template <typename ValueType>
+        emitters::LLVMValue GetValueFromPaddedVolume(emitters::IRFunctionEmitter& function,
+                                                     emitters::LLVMValue inputVolume,
+                                                     const model::PortMemoryLayout& inputLayout,
+                                                     int convPadding,
+                                                     std::array<int, 3> dataOrder,
+                                                     emitters::IRLocalScalar inputRow,
+                                                     emitters::IRLocalScalar inputColumn,
+                                                     emitters::IRLocalScalar inputChannel)
+        {
+            const int inputHeight = inputLayout.GetActiveSize(0);
+            const int inputWidth = inputLayout.GetActiveSize(1);
+            const int inputDepth = inputLayout.GetActiveSize(2);
+            const int inputPadding = inputLayout.GetOffset(0); // a proxy for the padding
+
+            const int extraPaddingVal = convPadding - inputPadding; // amount by which the convolution's desired padding exceeds input's
+            auto extraPadding = function.LocalScalar(extraPaddingVal);
+            if (extraPaddingVal > 0) // known at compile-time
+            {
+                auto valueRow = inputRow - extraPadding;
+                auto valueColumn = inputColumn - extraPadding;
+
+                auto rowBad = (valueRow < 0) || (valueRow >= inputHeight);
+                auto colBad = (valueColumn < 0) || (valueColumn >= inputWidth);
+                auto outOfBounds = rowBad || colBad;
+
+                emitters::LLVMValue returnValue = function.Variable(emitters::GetVariableType<ValueType>(), "returnVal");
+                function.If(outOfBounds, [=](emitters::IRFunctionEmitter& function) {
+                            function.StoreZero(returnValue);
+                        })
+                    .Else([=](emitters::IRFunctionEmitter& function) {
+                        // channel, row, col order
+                        auto index1 = valueRow * (inputWidth * inputDepth);
+                        auto index2 = valueColumn * inputDepth;
+                        auto index = index1 + index2 + inputChannel;
+                        auto val = function.ValueAt(inputVolume, index);
+
+                        // Note: we can't return from within an if/else block, so we store the value in a local variable
+                        function.Store(returnValue, val);
+                    });
+
+                return function.Load(returnValue);
+            }
+
+            if (extraPaddingVal != 0) // negative
+            {
+                inputRow = inputRow + extraPadding;
+                inputColumn = inputColumn + extraPadding;
+            }
+            return GetValueFromVolume(function, inputVolume, inputLayout, dataOrder, inputRow, inputColumn, inputChannel);
+        }
+
+        template <typename ValueType>
+        void EmitReceptiveFieldToColumns(emitters::IRFunctionEmitter& function,
+                                         emitters::LLVMValue inputVolume,
+                                         const model::PortMemoryLayout& inputLayout,
+                                         int filterWidth,
+                                         int stride,
+                                         int convPadding, // amount of padding to assume around the image -- determines output size
+                                         std::array<int, 3> dataOrder,
+                                         int outputWidth,
+                                         int outputHeight,
+                                         emitters::LLVMValue outputMatrix)
+        {
+            // Model parameters
+            const auto inputHeight = inputLayout.GetLogicalDimensionActiveSize(0);
+            const auto inputWidth = inputLayout.GetLogicalDimensionActiveSize(1);
+            const auto inputDepth = inputLayout.GetLogicalDimensionActiveSize(2);
+            const auto fieldVolumeSize = filterWidth * filterWidth * inputDepth;
+            const auto numOutputColumns = static_cast<int>(outputWidth * outputHeight);
+
+            // Input (I): d x h x w (planar)
+            // Output (S): (d * k * k) x (outputHeight * outputWidth) ==  fieldVolumeSize x outputImageSize
+
+            // Example
+            // k = 3, d = 2
+            //
+            //      A B C D    a b c d
+            // I =  E F G H    e f g h
+            //      I J K L    i j k l
+            //      M N O P    m n o p
+            //
+            //      . . . .  . A B C  D E F G  H I J K
+            //      . . . .  . a b c  d e f g  h i j k
+            //      . . . .  A B C D  E F G H  I J K L
+            //      . . . .  a b c d  e f g h  i j k l
+            //      . . . .  B C D E  F G H I  J K L M
+            //      . . . .  b c d e  f g h i  j k l m
+            //
+            //      . A B C  D E F G  H I J K  L M N O
+            //      . a b c  d e f g  h i j k  l m n o
+            // S =  A B C D  E F G H  I J K L  M N O P
+            //      a b c d  e f g h  i j k l  m n o p
+            //      B C D E  F G H I  J K L M  N O P .
+            //      b c d e  f g h i  j k l m  n o p .
+            //
+            //      D E F G  H I J K  L M N O  . . . .
+            //      d e f g  h i j k  l m n o  . . . .
+            //      E F G H  I J K L  M N O P  . . . .
+            //      e f g h  i j k l  m n o p  . . . .
+            //      F G H I  J K L M  N O P .  . . . .
+            //      f g h i  j k l m  n o p .  . . . .
+            //
+            // Note that the middle d=2 rows of S are the entire image, linearized:
+            // A B C D E F G H I J K L M N O P a b c d e f g h i j k l m n o p
+
+            // const int extraPadding = (int)convPadding - (int)inputPadding; // extraPadding is the amount of extra padding we need to do, on top of what's in the input data
+            const int extraPadding = convPadding;
+            const bool useContiguousReshape = (dataOrder == std::array<int, 3>({ { 2, 0, 1 } })) && (stride == 1); // channel, row, column order, unit stride
+            if (useContiguousReshape)
+            {
+                // assert(inputPadding == 0 && "Input data must not be padded");
+                // Points to the beginning of the input volume
+                emitters::LLVMValue inputPtr = function.PointerOffset(inputVolume, 0);
+
+                // Points to the beginning of the outputMatrix
+                emitters::LLVMValue outputPtr = function.PointerOffset(outputMatrix, 0);
+
+                // Unroll outer loops
+                for (int fy = 0; fy < filterWidth; ++fy)
+                {
+                    for (int fx = 0; fx < filterWidth; ++fx)
+                    {
+                        // `outputRow` is the row of the output matrix to start writing to. Multiplied by `inputDepth`, because
+                        // we're going to memcpy `inputDepth` rows at once
+                        int outputRow = (fy * filterWidth + fx) * inputDepth;
+
+                        int outputOffset1 = inputWidth * (extraPadding - fy); // where to start writing this row in the output
+                        int outputOffset2 = (extraPadding - fx); // where to start writing this row in the output
+                        int inputOffset = 0; // where to start reading from for this row
+                        if (outputOffset1 < 0)
+                        {
+                            inputOffset -= outputOffset1;
+                            outputOffset1 = 0;
+                        }
+                        if (outputOffset2 < 0)
+                        {
+                            inputOffset -= outputOffset2;
+                            outputOffset2 = 0;
+                        }
+                        int outputOffset = outputOffset1 + outputOffset2;
+                        int count = (inputWidth * inputHeight * inputDepth) - inputOffset - outputOffset;
+                        outputOffset += outputRow * numOutputColumns;
+
+                        // For this output row, copy what we need from the input image
+                        function.MemoryCopy<ValueType>(inputPtr, inputOffset, outputPtr, outputOffset, count);
+                        const int outputRowOffset = outputRow * numOutputColumns;
+
+                        // Zero out the padding areas
+                        // BUG: explicit capture-by-ref entries are here to work around a GCC bug
+                        function.For(inputDepth, [=, &fx, &fy, &extraPadding, &inputWidth, &inputHeight, &outputWidth, &numOutputColumns](emitters::IRFunctionEmitter& function, emitters::LLVMValue channelValue) {
+                            auto channel = function.LocalScalar(channelValue);
+                            auto outputDepthOffset = channel * numOutputColumns;
+
+                            // Points to the beginning of the current channel in the outputMatrix
+                            auto outputChannelPtr = function.PointerOffset(outputMatrix, outputDepthOffset);
+
+                            uint8_t paddingValue = 0;
+                            if (fy < extraPadding)
+                            {
+                                // zero out full image rows at beginning of image
+                                int count = (extraPadding - fy) * outputWidth;
+                                int begin = 0;
+                                function.MemorySet<ValueType>(outputChannelPtr, outputRowOffset + begin, function.Literal<uint8_t>(paddingValue), count);
+                            }
+                            else if (fy > extraPadding)
+                            {
+                                // zero out full image rows at end of image
+                                int count = (fy - extraPadding) * outputWidth;
+                                int begin = numOutputColumns - count;
+                                assert(begin >= 0);
+                                function.MemorySet<ValueType>(outputChannelPtr, outputRowOffset + begin, function.Literal<uint8_t>(paddingValue), count);
+                            }
+
+                            if (fx < extraPadding)
+                            {
+                                // zero out elements at beginning of each row
+                                int count = extraPadding - fx;
+                                // BUG: explicit capture-by-ref entries are here to work around a GCC bug
+                                function.For(inputHeight, [=, &inputWidth, &outputRowOffset](emitters::IRFunctionEmitter& function, emitters::LLVMValue indexValue) {
+                                    auto index = function.LocalScalar(indexValue);
+                                    auto begin = index * inputWidth;
+                                    auto offset = begin + outputRowOffset;
+                                    function.MemorySet<ValueType>(outputChannelPtr, offset, function.Literal<uint8_t>(paddingValue), count);
+                                });
+                            }
+                            else if (fx > extraPadding)
+                            {
+                                // zero out elements at end of each row
+                                int count = fx - extraPadding;
+                                // BUG: explicit capture-by-ref entries are here to work around a GCC bug
+                                function.For(inputHeight, [=, &inputWidth, &outputRowOffset](emitters::IRFunctionEmitter& function, emitters::LLVMValue indexValue) {
+                                    auto index = function.LocalScalar(indexValue);
+                                    auto begin = ((index + 1) * inputWidth) - count;
+                                    auto offset = begin + outputRowOffset;
+                                    function.MemorySet<ValueType>(outputChannelPtr, offset, function.Literal<uint8_t>(paddingValue), count);
+                                });
+                            }
+                        });
+                    }
+                }
+            }
+            else // Normal, single value-at-a-time method
+            {
+                // The outer loop iterates over all d * k * k entries in the receptive field
+                function.For(fieldVolumeSize, [=](emitters::IRFunctionEmitter& function, emitters::LLVMValue fValue) {
+                    auto f = function.LocalScalar(fValue);
+                    auto fieldChannel = function.LocalScalar();
+                    auto fieldColumn = function.LocalScalar();
+                    auto fieldRow = function.LocalScalar();
+
+                    // TODO: use the entries of dataOrder to compute the indices
+                    if (dataOrder == std::array<int, 3>({ { 0, 1, 2 } })) // row, column, channel order
+                    {
+                        fieldChannel = f % inputDepth;
+                        auto fDivDepth = f / inputDepth;
+                        fieldColumn = fDivDepth % filterWidth;
+                        fieldRow = fDivDepth / filterWidth;
+                    }
+                    else // channel, row, column order
+                    {
+                        fieldColumn = f % filterWidth;
+                        auto fDivColumns = f / filterWidth;
+                        fieldRow = fDivColumns % filterWidth;
+                        fieldChannel = fDivColumns / filterWidth;
+                    }
+
+                    // Now for each receptive field entry, iterate over all h * w locations in the output image
+                    function.For(outputHeight, [=, &fieldRow, &fieldColumn](emitters::IRFunctionEmitter& function, emitters::LLVMValue outputImageRowValue) {
+                        auto outputImageRow = function.LocalScalar(outputImageRowValue);
+                        auto inputRow = outputImageRow * stride;
+                        function.For(outputWidth, [=, &fieldRow, &fieldColumn, &inputRow](emitters::IRFunctionEmitter& function, emitters::LLVMValue outputImageColumnValue) {
+                            auto outputImageColumn = function.LocalScalar(outputImageColumnValue);
+                            auto inputColumn = outputImageColumn * stride;
+
+                            // outRowOffset is the offset to the f'th row in the output S matrix
+                            auto outRowOffset = f * (outputHeight * outputWidth);
+
+                            // outColRowOffset is the offset to the column of the S matrix where `outputImageRow` begins
+                            auto outColRowOffset = outputImageRow * outputWidth;
+                            // outputIndex is the index of the entry in S to write to
+                            auto outputIndex = outRowOffset + (outColRowOffset + outputImageColumn);
+
+                            // input row and column in the input image
+                            auto entryRow = inputRow + fieldRow;
+                            auto entryColumn = inputColumn + fieldColumn;
+                            auto volumeValue = GetValueFromPaddedVolume<ValueType>(function, inputVolume, inputLayout, extraPadding, dataOrder, entryRow, entryColumn, fieldChannel);
+                            function.SetValueAt(outputMatrix, outputIndex, volumeValue);
+                        });
+                    });
+                });
+            }
+        }
+    } // namespace
+
+    //
+    // ReceptiveFieldMatrixNode
+    //
+    template <typename ValueType>
+    ReceptiveFieldMatrixNode<ValueType>::ReceptiveFieldMatrixNode() :
+        CompilableNode({ &_input }, { &_output }),
+        _input(this, {}, defaultInputPortName),
+        _output(this, defaultOutputPortName, 0),
+        _filterWidth(0),
+        _stride(0),
+        _convolutionPadding(0),
+        _dataOrder({ { 0, 1, 2 } }),
+        _outputWidth(0),
+        _outputHeight(0)
+    {
+    }
+
+    template <typename ValueType>
+    ReceptiveFieldMatrixNode<ValueType>::ReceptiveFieldMatrixNode(const model::OutputPort<ValueType>& input, const model::PortMemoryLayout& inputMemoryLayout, int filterWidth, int stride, int convolutionPadding, std::array<int, 3> dataOrder, int outputWidth, int outputHeight) :
+        CompilableNode({ &_input }, { &_output }),
+        _input(this, input, defaultInputPortName),
+        _output(this, defaultOutputPortName, model::PortMemoryLayout(model::MemoryShape{ outputWidth * outputHeight, filterWidth * filterWidth * inputMemoryLayout.GetLogicalDimensionActiveSize(2) }, model::DimensionOrder{ dataOrder })),
+        _inputMemoryLayout(inputMemoryLayout),
+        _filterWidth(filterWidth),
+        _stride(stride),
+        _convolutionPadding(convolutionPadding),
+        _dataOrder(dataOrder),
+        _outputWidth(outputWidth),
+        _outputHeight(outputHeight)
+    {
+        if (inputMemoryLayout.NumDimensions() != 3)
+        {
+            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "ReceptiveFieldMatrixNode: inputMemoryLayout must have 3 dimensions");
+        }
+    }
+
+    template <typename ValueType>
+    void ReceptiveFieldMatrixNode<ValueType>::Copy(model::ModelTransformer& transformer) const
+    {
+        const auto& newPortElements = transformer.GetCorrespondingInputs(_input);
+        auto newNode = transformer.AddNode<ReceptiveFieldMatrixNode>(newPortElements, GetInputMemoryLayout(), _filterWidth, _stride, _convolutionPadding, _dataOrder, _outputWidth, _outputHeight);
+        transformer.MapNodeOutput(this->output, newNode->output);
+    }
+
+    template <typename ValueType>
+    void ReceptiveFieldMatrixNode<ValueType>::Compute() const
+    {
+        throw utilities::LogicException(utilities::LogicExceptionErrors::notImplemented);
+    }
+
+    template <typename ValueType>
+    void ReceptiveFieldMatrixNode<ValueType>::Compile(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
+    {
+        emitters::LLVMValue pInput = compiler.EnsurePortEmitted(this->input);
+        emitters::LLVMValue pOutput = compiler.EnsurePortEmitted(this->output);
+
+        const auto& inputLayout = this->GetInputMemoryLayout();
+        assert(inputLayout.NumDimensions() == 3);
+
+        // Re-shape input
+        EmitReceptiveFieldToColumns<ValueType>(function, pInput, inputLayout, _filterWidth, _stride, _convolutionPadding, _dataOrder, _outputWidth, _outputHeight, pOutput);
+    }
+
+    template <typename ValueType>
+    void ReceptiveFieldMatrixNode<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
+    {
+        Node::WriteToArchive(archiver);
+        archiver[defaultInputPortName] << _input;
+        archiver[defaultOutputPortName] << _output;
+        archiver["inputLayout"] << _inputMemoryLayout;
+
+        archiver["filterWidth"] << _filterWidth;
+        archiver["stride"] << _stride;
+        ;
+        archiver["convolutionPadding"] << _convolutionPadding;
+
+        std::vector<int> dataOrder(_dataOrder.begin(), _dataOrder.end());
+        archiver["dataOrder"] << dataOrder;
+
+        archiver["outputWidth"] << _outputWidth;
+        archiver["outputHeight"] << _outputHeight;
+    }
+
+    template <typename ValueType>
+    void ReceptiveFieldMatrixNode<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
+    {
+        Node::ReadFromArchive(archiver);
+        archiver[defaultInputPortName] >> _input;
+        archiver[defaultOutputPortName] >> _output;
+        archiver["inputLayout"] >> _inputMemoryLayout;
+
+        archiver["filterWidth"] >> _filterWidth;
+        archiver["stride"] >> _stride;
+        archiver["convolutionPadding"] >> _convolutionPadding;
+
+        std::vector<int> dataOrder;
+        archiver["dataOrder"] >> dataOrder;
+        std::copy(dataOrder.begin(), dataOrder.end(), _dataOrder.begin());
+
+        archiver["outputWidth"] >> _outputWidth;
+        archiver["outputHeight"] >> _outputHeight;
+    }
+} // namespace nodes
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/nodes/include/ReorderDataNode.h b/libraries/nodes/include/ReorderDataNode.h
index 23004aae4..f963eb5d1 100644
--- a/libraries/nodes/include/ReorderDataNode.h
+++ b/libraries/nodes/include/ReorderDataNode.h
@@ -146,4 +146,376 @@ namespace nodes
 } // namespace nodes
 } // namespace ell
 
-#include "../tcc/ReorderDataNode.tcc"
+#pragma region implementation
+
+#include <model/include/CompilableNodeUtilities.h>
+#include <model/include/IRMapCompiler.h>
+#include <model/include/OutputNode.h>
+
+#include <vector>
+
+namespace ell
+{
+namespace nodes
+{
+    namespace ReorderDataNodeDetail
+    {
+        using emitters::IRLocalScalar;
+        using model::DimensionOrder;
+        using model::MemoryCoordinates;
+
+        inline MemoryCoordinates LogicalToPhysical(const MemoryCoordinates& coordinates, const DimensionOrder& order)
+        {
+            const int numDimensions = coordinates.NumDimensions();
+            std::vector<int> result(numDimensions);
+            for (int index = 0; index < numDimensions; ++index)
+            {
+                result[index] = coordinates[order[index]];
+            }
+            return { result };
+        }
+
+        inline std::vector<IRLocalScalar> LogicalToPhysical(const std::vector<IRLocalScalar>& coordinates,
+                                                            const DimensionOrder& order)
+        {
+            const int numDimensions = order.NumDimensions();
+            // copying coordinates[0] just because IRLocalScalar doesn't have a default c'tor
+            std::vector<IRLocalScalar> result(numDimensions, coordinates[0]);
+            for (int index = 0; index < numDimensions; ++index)
+            {
+                result[index] = coordinates[order[index]];
+            }
+            return result;
+        }
+
+        inline MemoryCoordinates PhysicalToLogical(const MemoryCoordinates& coordinates, const DimensionOrder& order)
+        {
+            const int numDimensions = coordinates.NumDimensions();
+            std::vector<int> result(numDimensions);
+            for (int index = 0; index < numDimensions; ++index)
+            {
+                result[order[index]] = coordinates[index];
+            }
+            return { result };
+        }
+
+        inline std::vector<IRLocalScalar> PhysicalToLogical(const std::vector<IRLocalScalar>& coordinates,
+                                                            const DimensionOrder& order)
+        {
+            const int numDimensions = order.NumDimensions();
+            // copying coordinates[0] just because IRLocalScalar doesn't have a default c'tor
+            std::vector<emitters::IRLocalScalar> result(numDimensions, coordinates[0]);
+            for (int index = 0; index < numDimensions; ++index)
+            {
+                result[order[index]] = coordinates[index];
+            }
+            return result;
+        }
+    } // namespace ReorderDataNodeDetail
+
+    //
+    // ReorderDataNode
+    //
+    template <typename ValueType>
+    ReorderDataNode<ValueType>::ReorderDataNode() :
+        CompilableNode({ &_input }, { &_output }),
+        _input(this, {}, defaultInputPortName),
+        _output(this, defaultOutputPortName, 0)
+    {}
+
+    //
+    // Without reordering ("reshape" / slicing)
+    //
+    template <typename ValueType>
+    ReorderDataNode<ValueType>::ReorderDataNode(const model::OutputPort<ValueType>& input,
+                                                const model::PortMemoryLayout& outputMemoryLayout,
+                                                ValueType paddingValue) :
+        CompilableNode({ &_input }, { &_output }),
+        _input(this, input, defaultInputPortName),
+        _output(this, defaultOutputPortName, outputMemoryLayout),
+        _paddingValue(paddingValue)
+    {
+        _inputMemoryLayout = _input.GetMemoryLayout();
+        if (_inputMemoryLayout.NumDimensions() != outputMemoryLayout.NumDimensions())
+        {
+            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument,
+                                            "Error: input and output layouts must have same dimension");
+        }
+    }
+
+    template <typename ValueType>
+    ReorderDataNode<ValueType>::ReorderDataNode(const model::OutputPort<ValueType>& input,
+                                                const model::PortMemoryLayout& inputMemoryLayout,
+                                                const model::PortMemoryLayout& outputMemoryLayout,
+                                                ValueType paddingValue) :
+        CompilableNode({ &_input }, { &_output }),
+        _input(this, input, defaultInputPortName),
+        _output(this, defaultOutputPortName, outputMemoryLayout),
+        _inputMemoryLayout(inputMemoryLayout),
+        _paddingValue(paddingValue)
+    {
+        if (inputMemoryLayout.NumDimensions() != outputMemoryLayout.NumDimensions())
+        {
+            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument,
+                                            "Error: input and output layouts must have same dimension");
+        }
+    }
+
+    //
+    // With reordering ("reshape" / slicing, followed by transpose / dimension reordering)
+    //
+    template <typename ValueType>
+    ReorderDataNode<ValueType>::ReorderDataNode(const model::OutputPort<ValueType>& input,
+                                                const model::DimensionOrder& order) :
+        CompilableNode({ &_input }, { &_output }),
+        _input(this, input, defaultInputPortName),
+        _output(this, defaultOutputPortName, _input.GetMemoryLayout().ReorderedCopy(order))
+    {
+        _inputMemoryLayout = _input.GetMemoryLayout();
+        if (_inputMemoryLayout.NumDimensions() != order.NumDimensions())
+        {
+            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument,
+                                            "Error: input and output layouts must have same dimension");
+        }
+    }
+
+    template <typename ValueType>
+    ReorderDataNode<ValueType>::ReorderDataNode(const model::OutputPort<ValueType>& input,
+                                                const model::PortMemoryLayout& outputMemoryLayout,
+                                                const model::DimensionOrder& order,
+                                                ValueType paddingValue) :
+        CompilableNode({ &_input }, { &_output }),
+        _input(this, input, defaultInputPortName),
+        _output(this, defaultOutputPortName, outputMemoryLayout.ReorderedCopy(order)),
+        _paddingValue(paddingValue)
+    {
+        _inputMemoryLayout = _input.GetMemoryLayout();
+        if (_inputMemoryLayout.NumDimensions() != outputMemoryLayout.NumDimensions())
+        {
+            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument,
+                                            "Error: input and output layouts must have same dimension");
+        }
+    }
+
+    template <typename ValueType>
+    ReorderDataNode<ValueType>::ReorderDataNode(const model::OutputPort<ValueType>& input,
+                                                const model::PortMemoryLayout& inputMemoryLayout,
+                                                const model::PortMemoryLayout& outputMemoryLayout,
+                                                const model::DimensionOrder& order,
+                                                ValueType paddingValue) :
+        CompilableNode({ &_input }, { &_output }),
+        _input(this, input, defaultInputPortName),
+        _output(this, defaultOutputPortName, outputMemoryLayout.ReorderedCopy(order)),
+        _inputMemoryLayout(inputMemoryLayout),
+        _paddingValue(paddingValue)
+    {
+        if (inputMemoryLayout.NumDimensions() != outputMemoryLayout.NumDimensions())
+        {
+            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument,
+                                            "Error: input and output layouts must have same dimension");
+        }
+    }
+
+    template <typename ValueType>
+    model::MemoryCoordinates ReorderDataNode<ValueType>::ReorderOutputToInputLocation(
+        model::MemoryCoordinates physicalOutputCoordinates) const
+    {
+        const auto inputDimensionOrder = GetInputMemoryLayout().GetLogicalDimensionOrder();
+        const auto outputDimensionOrder = GetOutputMemoryLayout().GetLogicalDimensionOrder();
+
+        auto logicalCoordinates =
+            ReorderDataNodeDetail::PhysicalToLogical(physicalOutputCoordinates, outputDimensionOrder);
+        auto physicalInputCoordinates =
+            ReorderDataNodeDetail::LogicalToPhysical(logicalCoordinates, inputDimensionOrder);
+        return physicalInputCoordinates;
+    }
+
+    // TODO: for each dimension, loop over minimum of input and output interval. Then we don't have to check if the value is out-of-bounds
+    template <typename ValueType>
+    std::vector<emitters::IRLocalScalar> ReorderDataNode<ValueType>::ReorderOutputToInputLocation(
+        std::vector<emitters::IRLocalScalar> physicalOutputCoordinates) const
+    {
+        const auto inputDimensionOrder = GetInputMemoryLayout().GetLogicalDimensionOrder();
+        const auto outputDimensionOrder = GetOutputMemoryLayout().GetLogicalDimensionOrder();
+
+        auto logicalCoordinates =
+            ReorderDataNodeDetail::PhysicalToLogical(physicalOutputCoordinates, outputDimensionOrder);
+        auto physicalInputCoordinates =
+            ReorderDataNodeDetail::LogicalToPhysical(logicalCoordinates, inputDimensionOrder);
+        return physicalInputCoordinates;
+    }
+
+    template <typename ValueType>
+    void ReorderDataNode<ValueType>::Copy(model::ModelTransformer& transformer) const
+    {
+        const auto& newPortElements = transformer.GetCorrespondingInputs(_input);
+        auto newNode = transformer.AddNode<ReorderDataNode>(newPortElements,
+                                                            _inputMemoryLayout,
+                                                            _output.GetMemoryLayout(),
+                                                            _paddingValue);
+        transformer.MapNodeOutput(this->output, newNode->output);
+    }
+
+    template <typename ValueType>
+    void ReorderDataNode<ValueType>::ComputeDimensionLoop(const model::PortMemoryLayout& inputMemoryLayout,
+                                                          const model::PortMemoryLayout& outputMemoryLayout,
+                                                          int dimension,
+                                                          std::vector<int>& coordinates,
+                                                          std::vector<ValueType>& output) const
+    {
+        if (dimension == inputMemoryLayout.NumDimensions() - 1) // last dimension
+        {
+            for (int index = 0; index < outputMemoryLayout.GetActiveSize(dimension); ++index)
+            {
+                coordinates[dimension] = index;
+
+                auto inputLocation = ReorderOutputToInputLocation(coordinates);
+                auto inputIndex = inputMemoryLayout.GetEntryOffset(inputLocation);
+                auto outputIndex = outputMemoryLayout.GetEntryOffset(coordinates);
+                output[outputIndex] = _input[inputIndex];
+            }
+        }
+        else
+        {
+            for (int index = 0; index < outputMemoryLayout.GetActiveSize(dimension); ++index)
+            {
+                coordinates[dimension] = index;
+                ComputeDimensionLoop(inputMemoryLayout, outputMemoryLayout, dimension + 1, coordinates, output);
+            }
+        }
+    }
+
+    // TODO: for each dimension, loop over minimum of input and output interval. Then we don't have to check if the value is out-of-bounds
+    template <typename ValueType>
+    void ReorderDataNode<ValueType>::Compute() const
+    {
+        const auto inputMemoryLayout = GetInputMemoryLayout();
+        const auto outputMemoryLayout = _output.GetMemoryLayout();
+        if (outputMemoryLayout == inputMemoryLayout)
+        {
+            _output.SetOutput(_input.GetValue());
+        }
+        else
+        {
+            const int numDimensions = inputMemoryLayout.NumDimensions();
+            const int outputSize = outputMemoryLayout.GetMemorySize();
+            if (numDimensions != outputMemoryLayout.NumDimensions())
+            {
+                throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument,
+                                                "Error: input and output layouts must have same dimension");
+            }
+
+            std::vector<ValueType> output(outputSize, _paddingValue); // initialize to padding value
+            std::vector<int> coordinates(numDimensions);
+            ComputeDimensionLoop(inputMemoryLayout, outputMemoryLayout, 0, coordinates, output);
+            _output.SetOutput(output);
+        }
+    }
+
+    template <typename ValueType>
+    void ReorderDataNode<ValueType>::Compile(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
+    {
+        assert(this->input.Size() > 1);
+        auto input = function.LocalArray(compiler.EnsurePortEmitted(this->input));
+        auto output = function.LocalArray(compiler.EnsurePortEmitted(this->output, _paddingValue));
+
+        const auto inputMemoryLayout = GetInputMemoryLayout();
+        const auto outputMemoryLayout = GetOutputMemoryLayout();
+
+        const int numDimensions = inputMemoryLayout.NumDimensions();
+        const int outputSize = outputMemoryLayout.GetMemorySize();
+        UNUSED(outputSize);
+
+        std::vector<emitters::IRFunctionEmitter::ConstLoopRange> ranges;
+        for (int dimensionIndex = 0; dimensionIndex < numDimensions; ++dimensionIndex)
+        {
+            ranges.push_back({ 0, outputMemoryLayout.GetActiveSize(dimensionIndex) });
+        }
+
+        function.For(ranges,
+                     [input,
+                      output,
+                      inputMemoryLayout,
+                      outputMemoryLayout,
+                      this](emitters::IRFunctionEmitter& function, std::vector<emitters::IRLocalScalar> indices) {
+                         auto inputLocation = ReorderOutputToInputLocation(indices);
+                         auto inputIndex = model::EmitGetEntryOffset(function, inputLocation, inputMemoryLayout);
+                         auto outputIndex = model::EmitGetEntryOffset(function, indices, outputMemoryLayout);
+                         output[outputIndex] = input[inputIndex];
+                     });
+    }
+
+    template <typename ValueType>
+    ell::utilities::ArchiveVersion ReorderDataNode<ValueType>::GetArchiveVersion() const
+    {
+        constexpr utilities::ArchiveVersion currentArchiveVersion = {
+            utilities::ArchiveVersionNumbers::v8_port_memory_layout
+        };
+        return std::max(currentArchiveVersion, CompilableNode::GetArchiveVersion());
+    }
+
+    template <typename ValueType>
+    bool ReorderDataNode<ValueType>::CanReadArchiveVersion(const utilities::ArchiveVersion& version) const
+    {
+        return CompilableNode::CanReadArchiveVersion(version);
+    }
+
+    template <typename ValueType>
+    void ReorderDataNode<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
+    {
+        CompilableNode::WriteToArchive(archiver);
+        archiver[defaultInputPortName] << _input;
+        archiver["inputLayout"] << _inputMemoryLayout;
+        archiver["outputLayout"] << GetOutputMemoryLayout();
+        archiver["paddingValue"] << _paddingValue;
+    }
+
+    template <typename ValueType>
+    void ReorderDataNode<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
+    {
+        CompilableNode::ReadFromArchive(archiver);
+        archiver[defaultInputPortName] >> _input;
+        archiver["inputLayout"] >> _inputMemoryLayout;
+        model::PortMemoryLayout outputMemoryLayout;
+        if (archiver.HasNextPropertyName("outputLayout"))
+        {
+            // backward-compatability
+            archiver["outputLayout"] >> outputMemoryLayout;
+
+            if (archiver.HasNextPropertyName("order"))
+            {
+                std::vector<int> order;
+                archiver["order"] >> order;
+                outputMemoryLayout = model::PortMemoryLayout(outputMemoryLayout.GetActiveSize(),
+                                                             outputMemoryLayout.GetExtent(),
+                                                             outputMemoryLayout.GetOffset(),
+                                                             outputMemoryLayout.GetCumulativeIncrement(),
+                                                             order);
+            }
+            _output.SetMemoryLayout(outputMemoryLayout);
+        }
+        else
+        {
+            _output.SetMemoryLayout(_inputMemoryLayout);
+            if (archiver.HasNextPropertyName("order"))
+            {
+                std::vector<int> order;
+                archiver["order"] >> order;
+                _output.SetMemoryLayout(GetOutputMemoryLayout().ReorderedCopy(order));
+            }
+        }
+
+        if (archiver.HasNextPropertyName("order"))
+        {
+            std::vector<int> order;
+            archiver["order"] >> order;
+            _output.SetMemoryLayout(GetOutputMemoryLayout().ReorderedCopy(order));
+        }
+
+        archiver["paddingValue"] >> _paddingValue;
+    }
+
+} // namespace nodes
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/nodes/include/SinkNode.h b/libraries/nodes/include/SinkNode.h
index 49e4292d0..4f247e7d4 100644
--- a/libraries/nodes/include/SinkNode.h
+++ b/libraries/nodes/include/SinkNode.h
@@ -113,4 +113,182 @@ namespace nodes
 } // namespace nodes
 } // namespace ell
 
-#include "../tcc/SinkNode.tcc"
\ No newline at end of file
+#pragma region implementation
+
+#include <utilities/include/Debug.h>
+#include <utilities/include/Exception.h>
+
+namespace ell
+{
+namespace nodes
+{
+    template <typename ValueType>
+    SinkNode<ValueType>::SinkNode() :
+        SinkNode({}, {}, model::MemoryShape{ 0 }, "", nullptr)
+    {
+    }
+
+    // Following the pattern of OutputNode, we provide a constructor override that infers the shape from the input
+    template <typename ValueType>
+    SinkNode<ValueType>::SinkNode(const model::OutputPort<ValueType>& input, const model::OutputPort<bool>& trigger, const std::string& sinkFunctionName, SinkFunction<ValueType> sink) :
+        SinkNode(input, trigger, model::MemoryShape{ static_cast<int>(input.Size()) }, sinkFunctionName, sink)
+    {
+    }
+
+    template <typename ValueType>
+    SinkNode<ValueType>::SinkNode(const model::OutputPort<ValueType>& input, const model::OutputPort<bool>& trigger, size_t outputVectorSize, const std::string& sinkFunctionName, SinkFunction<ValueType> sink) :
+        SinkNode(input, trigger, model::MemoryShape{ static_cast<int>(outputVectorSize) }, sinkFunctionName, sink)
+    {
+    }
+
+    template <typename ValueType>
+    SinkNode<ValueType>::SinkNode(const model::OutputPort<ValueType>& input, const model::OutputPort<bool>& trigger, const model::MemoryShape& shape, const std::string& sinkFunctionName, SinkFunction<ValueType> sink) :
+        model::SinkNodeBase(_input, _trigger, _output, shape, sinkFunctionName),
+        _input(this, input, defaultInputPortName),
+        _trigger(this, trigger, triggerPortName),
+        _output(this, defaultOutputPortName, shape),
+        _sink(sink == nullptr ? [](const auto&) {} : sink)
+    {
+    }
+
+    template <typename ValueType>
+    void SinkNode<ValueType>::Compute() const
+    {
+        DEBUG_THROW(_sink == nullptr, utilities::InputException(utilities::InputExceptionErrors::nullReference, "Sink function is not set"));
+
+        if (_sink != nullptr && _trigger.GetValue(0))
+        {
+            _sink(_input.GetValue());
+        }
+        _output.SetOutput(_input.GetValue());
+    }
+
+    template <typename ValueType>
+    void SinkNode<ValueType>::Compile(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
+    {
+        emitters::LLVMValue pInput = compiler.EnsurePortEmitted(input);
+        emitters::LLVMValue pTrigger = compiler.EnsurePortEmitted(trigger);
+        std::string prefixedName(compiler.GetNamespacePrefix() + "_" + GetCallbackName());
+        auto& module = function.GetModule();
+        auto triggerValue = function.ValueAt(pTrigger, 0);
+
+        function.If(emitters::TypedComparison::equals, triggerValue, function.Literal(true), [prefixedName, pInput, &module, &compiler](emitters::IRFunctionEmitter& function) {
+            // look up our global context object
+            auto context = module.GlobalPointer(compiler.GetNamespacePrefix() + "_context", emitters::VariableType::Byte);
+            auto globalContext = function.Load(context);
+
+            // Callback signature: void SinkFunction(void* context, ValueType* array)
+            const emitters::NamedVariableTypeList parameters = { { "context", emitters::VariableType::BytePointer },
+                                                                 { "output", emitters::GetPointerType(emitters::GetVariableType<ValueType>()) } };
+            module.DeclareFunction(prefixedName, emitters::VariableType::Void, parameters);
+
+            emitters::LLVMFunction pSinkFunction = module.GetFunction(prefixedName);
+            function.Call(pSinkFunction, { globalContext, function.PointerOffset(pInput, function.Literal(0)) });
+        });
+
+        // Tag the sink function as a callback that is emitted in headers
+        module.IncludeInCallbackInterface(prefixedName, "SinkNode");
+
+        // Set output values as well, useful when user code is in a non-event-driven mode
+        if (!IsScalar(input) && !compiler.GetCompilerOptions().unrollLoops)
+        {
+            SetOutputValuesLoop(compiler, function);
+        }
+        else
+        {
+            SetOutputValuesExpanded(compiler, function);
+        }
+    }
+
+    template <typename ValueType>
+    void SinkNode<ValueType>::Copy(model::ModelTransformer& transformer) const
+    {
+        const auto& newInput = transformer.GetCorrespondingInputs(_input);
+        const auto& newTrigger = transformer.GetCorrespondingInputs(_trigger);
+        auto newNode = transformer.AddNode<SinkNode<ValueType>>(newInput, newTrigger, GetShape(), GetCallbackName(), _sink);
+        transformer.MapNodeOutput(output, newNode->output);
+    }
+
+    template <typename ValueType>
+    utilities::ArchiveVersion SinkNode<ValueType>::GetArchiveVersion() const
+    {
+        constexpr utilities::ArchiveVersion sinkNodeShapeArchiveVersion = { utilities::ArchiveVersionNumbers::v6_sink_triggers };
+
+        return sinkNodeShapeArchiveVersion;
+    }
+
+    template <typename ValueType>
+    bool SinkNode<ValueType>::CanReadArchiveVersion(const utilities::ArchiveVersion& version) const
+    {
+        constexpr utilities::ArchiveVersion sinkNodeNoShapeArchiveVersion = { utilities::ArchiveVersionNumbers::v0_initial };
+        constexpr utilities::ArchiveVersion sinkNodeShapeArchiveVersion = { utilities::ArchiveVersionNumbers::v6_sink_triggers };
+
+        return version >= sinkNodeNoShapeArchiveVersion && version <= sinkNodeShapeArchiveVersion;
+    }
+
+    template <typename ValueType>
+    void SinkNode<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
+    {
+        Node::WriteToArchive(archiver);
+        archiver[defaultInputPortName] << _input;
+        archiver[triggerPortName] << _trigger;
+        archiver["sinkFunctionName"] << GetCallbackName();
+        archiver["shape"] << GetShape().ToVector();
+    }
+
+    template <typename ValueType>
+    void SinkNode<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
+    {
+        Node::ReadFromArchive(archiver);
+        archiver[defaultInputPortName] >> _input;
+        archiver[triggerPortName] >> _trigger;
+
+        std::string sinkFunctionName;
+        archiver["sinkFunctionName"] >> sinkFunctionName;
+        SetCallbackName(sinkFunctionName);
+
+        std::vector<int> shapeVector;
+        archiver["shape"] >> shapeVector;
+        SetShape({ shapeVector });
+
+        // _sink needs to be set separately
+    }
+
+    template <typename ValueType>
+    void SinkNode<ValueType>::SetOutputValuesLoop(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
+    {
+        assert(input.Size() == output.Size());
+
+        // Concatenate the input ports in a similar way as OutputNodes,
+        // because SinkNodes are just callback-enabled OutputNodes.
+        auto input = function.LocalArray(compiler.EnsurePortEmitted(_input));
+        auto output = function.LocalArray(compiler.EnsurePortEmitted(_output));
+        // check if the output variable is null.
+        function.If(ell::emitters::TypedComparison::notEquals, output, function.NullPointer(output.value->getType()->getPointerElementType()->getPointerTo()), [input, output, this](emitters::IRFunctionEmitter& function) {
+            auto size = _input.Size();
+            function.For(size, [input, output](emitters::IRFunctionEmitter& function, auto i) {
+                output[i] = input[i];
+            });
+        });
+    }
+
+    template <typename ValueType>
+    void SinkNode<ValueType>::SetOutputValuesExpanded(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
+    {
+        compiler.EnsurePortEmitted(input);
+        emitters::LLVMValue pOutput = compiler.EnsurePortEmitted(output);
+
+        auto numInputs = input.Size();
+        assert(numInputs == output.Size());
+
+        for (size_t i = 0; i < numInputs; ++i)
+        {
+            // Concatenate the input ports
+            emitters::LLVMValue value = compiler.LoadPortElementVariable(input.GetInputElement(i));
+            function.SetValueAt(pOutput, function.Literal(static_cast<int>(i)), value);
+        }
+    }
+} // namespace nodes
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/nodes/include/SourceNode.h b/libraries/nodes/include/SourceNode.h
index eb368007f..a2bd35254 100644
--- a/libraries/nodes/include/SourceNode.h
+++ b/libraries/nodes/include/SourceNode.h
@@ -128,4 +128,204 @@ namespace nodes
 } // namespace nodes
 } // namespace ell
 
-#include "../tcc/SourceNode.tcc"
\ No newline at end of file
+#pragma region implementation
+
+namespace ell
+{
+namespace nodes
+{
+    template <typename ValueType>
+    SourceNode<ValueType>::SourceNode() :
+        SourceNode({}, model::MemoryShape{ 0 }, "", nullptr)
+    {
+    }
+
+    template <typename ValueType>
+    SourceNode<ValueType>::SourceNode(const model::OutputPort<nodes::TimeTickType>& input, size_t inputVectorSize, const std::string& sourceFunctionName, SourceFunction<ValueType> source) :
+        SourceNode(input, model::MemoryShape{ static_cast<int>(inputVectorSize) }, sourceFunctionName, source)
+    {
+    }
+
+    template <typename ValueType>
+    SourceNode<ValueType>::SourceNode(const model::OutputPort<nodes::TimeTickType>& input, const model::MemoryShape& shape, const std::string& sourceFunctionName, SourceFunction<ValueType> source) :
+        model::SourceNodeBase(_input, _output, sourceFunctionName),
+        _input(this, input, defaultInputPortName),
+        _output(this, defaultOutputPortName, shape),
+        _source(source == nullptr ? [](auto&) { return false; } : source)
+    {
+        _bufferedSample.resize(shape.NumElements());
+    }
+
+    template <typename ValueType>
+    SourceNode<ValueType>::SourceNode(const model::OutputPort<nodes::TimeTickType>& input, const model::PortMemoryLayout& layout, const std::string& sourceFunctionName, SourceFunction<ValueType> source) :
+        model::SourceNodeBase(_input, _output, sourceFunctionName),
+        _input(this, input, defaultInputPortName),
+        _output(this, defaultOutputPortName, layout),
+        _source(source == nullptr ? [](auto&) { return false; } : source)
+    {
+        _bufferedSample.resize(layout.NumElements());
+    }
+
+    template <typename ValueType>
+    void SourceNode<ValueType>::SetInput(std::vector<ValueType> inputValues)
+    {
+        assert(_bufferedSample.size() == inputValues.size());
+        _bufferedSample = inputValues;
+    }
+
+    template <typename ValueType>
+    void SourceNode<ValueType>::Compute() const
+    {
+        auto sampleTime = _input.GetValue(0);
+
+        if (_source(_bufferedSample))
+        {
+            // Determine if the sample time differs from the current time
+            auto currentTime = _input.GetValue(1);
+            if (currentTime > sampleTime)
+            {
+                // Interpolate _bufferedSample to match the sample time
+                Interpolate(currentTime, sampleTime);
+            }
+        }
+
+        _bufferedSampleTime = sampleTime;
+        _output.SetOutput(_bufferedSample);
+    }
+
+    template <typename ValueType>
+    void SourceNode<ValueType>::Compile(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
+    {
+        emitters::LLVMValue pInput = compiler.EnsurePortEmitted(input);
+        compiler.EnsurePortEmitted(output);
+        auto& module = function.GetModule();
+
+        // Globals
+        emitters::Variable* pBufferedSampleTimeVar = module.Variables().AddVariable<emitters::InitializedScalarVariable<TimeTickType>>(emitters::VariableScope::global, _bufferedSampleTime);
+        emitters::Variable* pBufferedSampleVar = module.Variables().AddVariable<emitters::InitializedVectorVariable<ValueType>>(emitters::VariableScope::global, output.Size());
+        emitters::LLVMValue pBufferedSampleTime = module.EnsureEmitted(*pBufferedSampleTimeVar);
+        emitters::LLVMValue pBufferedSample = module.EnsureEmitted(*pBufferedSampleVar);
+        emitters::LLVMValue bufferedSampleTime = function.Load(pBufferedSampleTime);
+        UNUSED(bufferedSampleTime);
+
+        // Callback function
+        const emitters::NamedVariableTypeList parameters = { { "context", emitters::VariableType::BytePointer },
+                                                             { "input", emitters::GetPointerType(emitters::GetVariableType<ValueType>()) } };
+        std::string prefixedName(compiler.GetNamespacePrefix() + "_" + GetCallbackName());
+        module.DeclareFunction(prefixedName, emitters::GetVariableType<bool>(), parameters);
+        module.IncludeInCallbackInterface(prefixedName, "SourceNode");
+
+        emitters::LLVMFunction pSamplingFunction = module.GetFunction(prefixedName);
+
+        // look up our global context object
+        auto context = module.GlobalPointer(compiler.GetNamespacePrefix() + "_context", emitters::VariableType::Byte);
+        auto globalContext = function.Load(context);
+
+        // Locals
+        auto sampleTime = function.ValueAt(pInput, function.Literal(0));
+
+        // Invoke the callback and optionally interpolate.
+        function.Call(pSamplingFunction, { globalContext, function.PointerOffset(pBufferedSample, 0) });
+
+        // TODO: Interpolate if there is a sample, and currentTime > sampleTime
+        // Note: currentTime can be retrieved via currentTime = function.ValueAt(pInput, function.Literal(1));
+
+        // Set sample values to the output
+        if (!IsScalar(output) && !compiler.GetCompilerOptions().unrollLoops)
+        {
+            SetOutputValuesLoop(compiler, function, pBufferedSample);
+        }
+        else
+        {
+            SetOutputValuesExpanded(compiler, function, pBufferedSample);
+        }
+
+        // Update the cached sample time
+        function.Store(pBufferedSampleTime, sampleTime);
+    }
+
+    template <typename ValueType>
+    void SourceNode<ValueType>::Copy(model::ModelTransformer& transformer) const
+    {
+        const auto& newPortElements = transformer.GetCorrespondingInputs(_input);
+        auto newNode = transformer.AddNode<SourceNode<ValueType>>(newPortElements, GetShape(), GetCallbackName(), _source);
+        transformer.MapNodeOutput(output, newNode->output);
+    }
+
+    template <typename ValueType>
+    utilities::ArchiveVersion SourceNode<ValueType>::GetArchiveVersion() const
+    {
+        constexpr utilities::ArchiveVersion sourceNodeShapeArchiveVersion = { utilities::ArchiveVersionNumbers::v4_source_sink_shapes };
+
+        return sourceNodeShapeArchiveVersion;
+    }
+
+    template <typename ValueType>
+    bool SourceNode<ValueType>::CanReadArchiveVersion(const utilities::ArchiveVersion& version) const
+    {
+        constexpr utilities::ArchiveVersion sourceNodeNoShapeArchiveVersion = { utilities::ArchiveVersionNumbers::v0_initial };
+        constexpr utilities::ArchiveVersion sourceNodeShapeArchiveVersion = { utilities::ArchiveVersionNumbers::v4_source_sink_shapes };
+
+        return version >= sourceNodeNoShapeArchiveVersion && version <= sourceNodeShapeArchiveVersion;
+    }
+
+    template <typename ValueType>
+    void SourceNode<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
+    {
+        Node::WriteToArchive(archiver);
+        archiver[defaultInputPortName] << _input;
+        archiver[defaultOutputPortName] << _output;
+        archiver["sourceFunctionName"] << GetCallbackName();
+        archiver["shape"] << GetShape().ToVector();
+    }
+
+    template <typename ValueType>
+    void SourceNode<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
+    {
+        Node::ReadFromArchive(archiver);
+        archiver[defaultInputPortName] >> _input;
+        archiver[defaultOutputPortName] >> _output;
+
+        std::string sourceFunctionName;
+        archiver["sourceFunctionName"] >> sourceFunctionName;
+        SetCallbackName(sourceFunctionName);
+
+        std::vector<int> shapeVector;
+        archiver["shape"] >> shapeVector;
+        SetShape({ shapeVector });
+    }
+
+    template <typename ValueType>
+    void SourceNode<ValueType>::Interpolate(TimeTickType /*originalTime*/, TimeTickType /*newTime*/) const
+    {
+        // Default to pass-through (derived classes will override).
+    }
+
+    template <typename ValueType>
+    void SourceNode<ValueType>::SetOutputValuesLoop(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function, emitters::LLVMValue sample)
+    {
+        emitters::LLVMValue pOutput = compiler.EnsurePortEmitted(output);
+
+        auto numValues = output.Size();
+        function.For(numValues, [sample, pOutput](emitters::IRFunctionEmitter& function, emitters::LLVMValue i) {
+            auto value = function.ValueAt(sample, i);
+            function.SetValueAt(pOutput, i, value);
+        });
+    }
+
+    template <typename ValueType>
+    void SourceNode<ValueType>::SetOutputValuesExpanded(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function, emitters::LLVMValue sample)
+    {
+        emitters::LLVMValue pOutput = compiler.EnsurePortEmitted(output);
+
+        auto numValues = output.Size();
+        for (size_t i = 0; i < numValues; ++i)
+        {
+            auto value = function.ValueAt(sample, i);
+            function.SetValueAt(pOutput, function.Literal(static_cast<int>(i)), value);
+        }
+    }
+} // namespace nodes
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/nodes/include/SquaredEuclideanDistanceNode.h b/libraries/nodes/include/SquaredEuclideanDistanceNode.h
index 322ad5a51..84e46bc6b 100644
--- a/libraries/nodes/include/SquaredEuclideanDistanceNode.h
+++ b/libraries/nodes/include/SquaredEuclideanDistanceNode.h
@@ -22,11 +22,11 @@
 #include <model/include/ModelTransformer.h>
 #include <model/include/Node.h>
 
-#include <math/include/MatrixOperations.h>
 #include <math/include/Matrix.h>
+#include <math/include/MatrixOperations.h>
 
-#include <vector>
 #include <string>
+#include <vector>
 
 namespace ell
 {
@@ -103,4 +103,134 @@ namespace nodes
 } // namespace nodes
 } // namespace ell
 
-#include "../tcc/SquaredEuclideanDistanceNode.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace nodes
+{
+    template <typename ValueType, math::MatrixLayout layout>
+    SquaredEuclideanDistanceNode<ValueType, layout>::SquaredEuclideanDistanceNode() :
+        Node({ &_input }, { &_output }),
+        _input(this, {}, defaultInputPortName),
+        _output(this, defaultOutputPortName, 1),
+        _vectorsAsMatrix(0, 0)
+    {
+    }
+
+    template <typename ValueType, math::MatrixLayout layout>
+    SquaredEuclideanDistanceNode<ValueType, layout>::SquaredEuclideanDistanceNode(const model::OutputPort<ValueType>& input, const math::Matrix<ValueType, layout>& vectorsAsMatrix) :
+        Node({ &_input }, { &_output }),
+        _input(this, input, defaultInputPortName),
+        _output(this, defaultOutputPortName, vectorsAsMatrix.NumRows()),
+        _vectorsAsMatrix(vectorsAsMatrix)
+    {
+        if (input.Size() != vectorsAsMatrix.NumColumns())
+        {
+            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "SquaredEuclideanDistanceNode: input size must match the number of columns in the vectorsAsMatrix");
+        }
+    }
+
+    template <typename ValueType, math::MatrixLayout layout>
+    void SquaredEuclideanDistanceNode<ValueType, layout>::WriteToArchive(utilities::Archiver& archiver) const
+    {
+        Node::WriteToArchive(archiver);
+
+        math::MatrixArchiver::Write(_vectorsAsMatrix, "vectorsAsMatrix", archiver);
+        archiver[defaultInputPortName] << _input;
+        archiver[defaultOutputPortName] << _output;
+    }
+
+    template <typename ValueType, math::MatrixLayout layout>
+    void SquaredEuclideanDistanceNode<ValueType, layout>::ReadFromArchive(utilities::Unarchiver& archiver)
+    {
+        Node::ReadFromArchive(archiver);
+
+        math::MatrixArchiver::Read(_vectorsAsMatrix, "vectorsAsMatrix", archiver);
+        archiver[defaultInputPortName] >> _input;
+        archiver[defaultOutputPortName] >> _output;
+    }
+
+    template <typename ValueType, math::MatrixLayout layout>
+    void SquaredEuclideanDistanceNode<ValueType, layout>::Copy(model::ModelTransformer& transformer) const
+    {
+        const auto& newPortElements = transformer.GetCorrespondingInputs(_input);
+        auto newNode = transformer.AddNode<SquaredEuclideanDistanceNode<ValueType, layout>>(newPortElements, _vectorsAsMatrix);
+        transformer.MapNodeOutput(output, newNode->output);
+    }
+
+    // We compute the distance (P - V)^2 as P^2 - 2 * P * V + V^2 where P is the input point and V is the set of vectors
+    template <typename ValueType, math::MatrixLayout layout>
+    bool SquaredEuclideanDistanceNode<ValueType, layout>::Refine(model::ModelTransformer& transformer) const
+    {
+        const auto& inputPortElements = transformer.GetCorrespondingInputs(_input);
+
+        // P^2 => scalar value
+        auto inputNorm2SquaredNode = transformer.AddNode<L2NormSquaredNode<double>>(inputPortElements);
+
+        // -2 * P * V => row-wise vector
+        auto vectorsAsMatrix = _vectorsAsMatrix;
+        vectorsAsMatrix.Transform([](double d) { return -2.0 * d; });
+        auto productNode = transformer.AddNode<MatrixVectorProductNode<double, math::MatrixLayout::rowMajor>>(inputPortElements, vectorsAsMatrix);
+
+        // Will hold the scalar value of P^2 for each row in the matrix
+        model::PortElements<ValueType> inputNorm2SquaredNodeOutputs;
+        // V^2 => row-wise vector of Norm-2 squared values of each vector in _vectorsAsMatrix
+        model::PortElements<ValueType> vectorNorm2SquaredConstantNodeOutputs;
+        for (size_t index = 0; index < _vectorsAsMatrix.NumRows(); ++index)
+        {
+            inputNorm2SquaredNodeOutputs.Append(inputNorm2SquaredNode->output);
+
+            auto matrixRow = _vectorsAsMatrix.GetRow(index);
+            auto rowNorm2SquaredConstantNode = transformer.AddNode<ConstantNode<ValueType>>(matrixRow.Norm2Squared());
+            vectorNorm2SquaredConstantNodeOutputs.Append(rowNorm2SquaredConstantNode->output);
+        }
+
+        // Add the three node outputs:
+        //   * inputNorm2SquaredNodeOutputs (A)
+        //   * vectorNorm2SquaredConstantNodeOutputs (B)
+        //   * productNode->output (C)
+        // and map it to output node
+        auto& A = inputNorm2SquaredNodeOutputs;
+        auto& B = vectorNorm2SquaredConstantNodeOutputs;
+        auto& C = productNode->output;
+        auto aPlusB = transformer.AddNode<BinaryOperationNode<double>>(A, B, emitters::BinaryOperationType::add);
+        auto aPlusBPlusC = transformer.AddNode<BinaryOperationNode<double>>(aPlusB->output, C, emitters::BinaryOperationType::add);
+        transformer.MapNodeOutput(output, aPlusBPlusC->output);
+
+        return true;
+    }
+
+    template <typename ValueType, math::MatrixLayout layout>
+    void SquaredEuclideanDistanceNode<ValueType, layout>::Compute() const
+    {
+        math::ColumnVector<ValueType> input(_input.Size());
+        for (size_t index = 0; index < _input.Size(); ++index)
+        {
+            input[index] = _input[index];
+        }
+
+        math::ColumnVector<ValueType> result(_vectorsAsMatrix.NumRows());
+
+        auto norm1sq = input.Norm2Squared();
+
+        // result = -2 * _v * input
+        math::MultiplyScaleAddUpdate(-2.0, _vectorsAsMatrix, input, 0.0, result);
+
+        for (size_t r = 0; r < _vectorsAsMatrix.NumRows(); r++)
+        {
+            result[r] += norm1sq + _vectorsAsMatrix.GetRow(r).Norm2Squared();
+        }
+
+        _output.SetOutput(result.ToArray());
+    }
+
+    template <typename ValueType, math::MatrixLayout layout>
+    SquaredEuclideanDistanceNode<ValueType, layout>* AddNodeToModelTransformer(const model::PortElements<ValueType>& input, math::ConstMatrixReference<ValueType, layout> vectorsAsMatrix, model::ModelTransformer& transformer)
+    {
+        return transformer.AddNode<SquaredEuclideanDistanceNode>(input, vectorsAsMatrix);
+    }
+} // namespace nodes
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/nodes/include/SumNode.h b/libraries/nodes/include/SumNode.h
index 342136370..57f71a227 100644
--- a/libraries/nodes/include/SumNode.h
+++ b/libraries/nodes/include/SumNode.h
@@ -79,4 +79,187 @@ namespace nodes
 } // namespace nodes
 } // namespace ell
 
-#include "../tcc/SumNode.tcc"
+#pragma region implementation
+
+#include <utilities/include/Unused.h>
+
+namespace ell
+{
+namespace nodes
+{
+    template <typename ValueType>
+    SumNode<ValueType>::SumNode() :
+        CompilableNode({ &_input }, { &_output }),
+        _input(this, {}, defaultInputPortName),
+        _output(this, defaultOutputPortName, 1)
+    {
+    }
+
+    template <typename ValueType>
+    SumNode<ValueType>::SumNode(const model::OutputPort<ValueType>& input) :
+        CompilableNode({ &_input }, { &_output }),
+        _input(this, input, defaultInputPortName),
+        _output(this, defaultOutputPortName, 1)
+    {
+    }
+
+    template <typename ValueType>
+    void SumNode<ValueType>::Compute() const
+    {
+        ValueType result = 0;
+        for (size_t index = 0; index < _input.Size(); ++index)
+        {
+            auto v = _input[index];
+            result += v;
+        }
+        _output.SetOutput({ result });
+    };
+
+    template <typename ValueType>
+    void SumNode<ValueType>::Copy(model::ModelTransformer& transformer) const
+    {
+        const auto& newPortElements = transformer.GetCorrespondingInputs(_input);
+        auto newNode = transformer.AddNode<SumNode<ValueType>>(newPortElements);
+        transformer.MapNodeOutput(output, newNode->output);
+    }
+
+    template <typename ValueType>
+    void SumNode<ValueType>::Compile(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
+    {
+        if (!compiler.GetCompilerOptions().unrollLoops)
+        {
+            size_t vectorSize = compiler.GetCompilerOptions().vectorWidth;
+            bool vectorize = compiler.GetCompilerOptions().allowVectorInstructions && (input.Size() > vectorSize);
+            if (vectorize)
+            {
+                CompileVectorizedLoop(compiler, function);
+            }
+            else
+            {
+                CompileLoop(compiler, function);
+            }
+        }
+        else
+        {
+            CompileExpanded(compiler, function);
+        }
+    }
+
+    template <typename ValueType>
+    void SumNode<ValueType>::CompileLoop(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
+    {
+        auto input = function.LocalArray(compiler.EnsurePortEmitted(_input));
+        auto output = function.LocalArray(compiler.EnsurePortEmitted(_output));
+
+        function.StoreZero(output);
+
+        const int size = _input.Size();
+        constexpr int blockSize = 4;
+        bool unrollLoop = size > 4 * blockSize; // silly heuristic
+        if (unrollLoop)
+        {
+            const int numBlocks = size / blockSize;
+            function.For(numBlocks, [input, output, blockSize](emitters::IRFunctionEmitter& function, auto i) {
+                auto blockStart = blockSize * i;
+                for (int innerIndex = 0; innerIndex < blockSize; ++innerIndex)
+                {
+                    emitters::IRLocalScalar value = input[blockStart + innerIndex];
+                    function.OperationAndUpdate(output, emitters::GetAddForValueType<ValueType>(), value);
+                }
+            });
+
+            // epilogue
+            const int epilogueSize = size - (blockSize * numBlocks);
+            if (epilogueSize > 0)
+            {
+                function.For(epilogueSize, [input, output](emitters::IRFunctionEmitter& function, auto i) {
+                    emitters::IRLocalScalar value = input[i];
+                    function.OperationAndUpdate(output, emitters::GetAddForValueType<ValueType>(), value);
+                });
+            }
+        }
+        else
+        {
+            function.For(size, [input, output](emitters::IRFunctionEmitter& function, auto i) {
+                emitters::IRLocalScalar value = input[i];
+                function.OperationAndUpdate(output, emitters::GetAddForValueType<ValueType>(), value);
+            });
+        }
+    }
+
+    template <typename ValueType>
+    void SumNode<ValueType>::CompileVectorizedLoop(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
+    {
+        const int size = _input.Size();
+        const int vectorSize = compiler.GetCompilerOptions().vectorWidth;
+        assert(size >= vectorSize);
+
+        emitters::LLVMValue input = compiler.EnsurePortEmitted(_input);
+        emitters::LLVMValue output = compiler.EnsurePortEmitted(_output);
+
+        // Get LLVM types
+        auto& emitter = function.GetEmitter();
+        auto elementType = emitter.Type(emitters::GetVariableType<ValueType>());
+        DEBUG_USED(elementType);
+        assert(llvm::VectorType::isValidElementType(elementType) && "Invalid element type for LLVM vector");
+        auto vectorType = emitter.VectorType(emitters::GetVariableType<ValueType>(), vectorSize);
+        auto vectorPointerType = vectorType->getPointerTo();
+
+        // cast input to pointer-to-vector
+        auto inputVector = function.CastPointer(input, vectorPointerType);
+
+        emitters::LLVMValue vectorAccumVar = function.Variable(vectorType, "vecAccum");
+        function.Store(vectorAccumVar, emitters::FillVector<ValueType>(function, vectorType, 0));
+
+        const int numBlocks = size / vectorSize;
+        function.For(numBlocks, [inputVector, vectorAccumVar](emitters::IRFunctionEmitter& function, auto blockIndex) {
+            auto value = function.ValueAt(inputVector, blockIndex);
+            function.OperationAndUpdate(vectorAccumVar, emitters::GetAddForValueType<ValueType>(), value);
+        });
+
+        // Accumulate horizontal sum into output
+        auto sum = emitters::HorizontalVectorSum<ValueType>(function, function.Load(vectorAccumVar));
+
+        // epilogue
+        const int epilogueSize = size - (vectorSize * numBlocks);
+        if (epilogueSize > 0)
+        {
+            for (int epilogueIndex = vectorSize * numBlocks; epilogueIndex < size; ++epilogueIndex)
+            {
+                emitters::LLVMValue pValue = function.ValueAt(input, function.Literal<int>(epilogueIndex));
+                sum = function.Operator(emitters::GetAddForValueType<ValueType>(), sum, pValue);
+            }
+        }
+        function.Store(output, sum);
+    }
+
+    template <typename ValueType>
+    void SumNode<ValueType>::CompileExpanded(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
+    {
+        emitters::LLVMValue pResult = compiler.EnsurePortEmitted(output);
+
+        function.StoreZero(pResult);
+        for (size_t i = 0; i < input.Size(); ++i)
+        {
+            auto pValue = compiler.LoadPortElementVariable(input.GetInputElement(i));
+            function.OperationAndUpdate(pResult, emitters::GetAddForValueType<ValueType>(), pValue);
+        }
+    }
+
+    template <typename ValueType>
+    void SumNode<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
+    {
+        Node::WriteToArchive(archiver);
+        archiver[defaultInputPortName] << _input;
+    }
+
+    template <typename ValueType>
+    void SumNode<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
+    {
+        Node::ReadFromArchive(archiver);
+        archiver[defaultInputPortName] >> _input;
+    }
+} // namespace nodes
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/nodes/include/TypeCastNode.h b/libraries/nodes/include/TypeCastNode.h
index 29340e22b..6d8ba1d86 100644
--- a/libraries/nodes/include/TypeCastNode.h
+++ b/libraries/nodes/include/TypeCastNode.h
@@ -73,4 +73,111 @@ namespace nodes
 } // namespace nodes
 } // namespace ell
 
-#include "../tcc/TypeCastNode.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace nodes
+{
+    template <typename InputValueType, typename OutputValueType>
+    TypeCastNode<InputValueType, OutputValueType>::TypeCastNode() :
+        CompilableNode({ &_input }, { &_output }),
+        _input(this, {}, defaultInputPortName),
+        _output(this, defaultOutputPortName, 0){};
+
+    template <typename InputValueType, typename OutputValueType>
+    TypeCastNode<InputValueType, OutputValueType>::TypeCastNode(const model::OutputPort<InputValueType>& input) :
+        CompilableNode({ &_input }, { &_output }),
+        _input(this, input, defaultInputPortName),
+        _output(this, defaultOutputPortName, input.Size()){};
+
+    template <typename InputValueType, typename OutputValueType>
+    void TypeCastNode<InputValueType, OutputValueType>::Compute() const
+    {
+        auto size = _output.Size();
+        std::vector<OutputValueType> outputValues(size);
+        for (size_t index = 0; index < size; ++index)
+        {
+            outputValues[index] = static_cast<OutputValueType>(_input[index]);
+        }
+        _output.SetOutput(outputValues);
+    }
+
+    template <typename InputValueType, typename OutputValueType>
+    void TypeCastNode<InputValueType, OutputValueType>::Copy(model::ModelTransformer& transformer) const
+    {
+        const auto& newPortElements = transformer.GetCorrespondingInputs(_input);
+        auto newNode = transformer.AddNode<TypeCastNode<InputValueType, OutputValueType>>(newPortElements);
+        transformer.MapNodeOutput(output, newNode->output);
+    }
+
+    template <typename InputValueType, typename OutputValueType>
+    void TypeCastNode<InputValueType, OutputValueType>::Compile(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
+    {
+        // The IR compiler currently implements bools using integers. We'll just use the already created variable.
+        auto inputType = emitters::GetVariableType<InputValueType>();
+        auto outputType = emitters::GetVariableType<OutputValueType>();
+
+        // no-op case
+        if (inputType == outputType)
+        {
+            emitters::Variable* elementVar = compiler.GetVariableForPort(input.GetReferencedPort());
+            compiler.SetVariableForPort(output, elementVar); // The types are the same, so this is a no-op. Just set the output variable to be the same as the input variable
+            return;
+        }
+
+        if (!compiler.GetCompilerOptions().unrollLoops)
+        {
+            CompileLoop(compiler, function);
+        }
+        else
+        {
+            CompileExpanded(compiler, function);
+        }
+    }
+
+    template <typename InputValueType, typename OutputValueType>
+    void TypeCastNode<InputValueType, OutputValueType>::CompileLoop(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
+    {
+        auto count = input.Size();
+        emitters::LLVMValue pInput = compiler.EnsurePortEmitted(input);
+        emitters::LLVMValue pResult = compiler.EnsurePortEmitted(output);
+
+        function.For(count, [pInput, pResult](emitters::IRFunctionEmitter& function, emitters::LLVMValue i) {
+            emitters::LLVMValue inputValue = function.ValueAt(pInput, i);
+            emitters::LLVMValue castElement = function.CastValue<OutputValueType>(inputValue);
+            function.SetValueAt(pResult, i, castElement);
+        });
+    }
+
+    template <typename InputValueType, typename OutputValueType>
+    void TypeCastNode<InputValueType, OutputValueType>::CompileExpanded(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
+    {
+        emitters::LLVMValue pResult = compiler.EnsurePortEmitted(output);
+
+        for (size_t i = 0; i < input.Size(); ++i)
+        {
+            emitters::LLVMValue inputValue = compiler.LoadPortElementVariable(input.GetInputElement(i));
+            emitters::LLVMValue castElement = function.CastValue<OutputValueType>(inputValue);
+            function.SetValueAt(pResult, function.Literal((int)i), castElement);
+        }
+    }
+
+    template <typename InputValueType, typename OutputValueType>
+    void TypeCastNode<InputValueType, OutputValueType>::WriteToArchive(utilities::Archiver& archiver) const
+    {
+        Node::WriteToArchive(archiver);
+        archiver[defaultInputPortName] << _input;
+    }
+
+    template <typename InputValueType, typename OutputValueType>
+    void TypeCastNode<InputValueType, OutputValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
+    {
+        Node::ReadFromArchive(archiver);
+        archiver[defaultInputPortName] >> _input;
+        _output.SetSize(_input.Size());
+    }
+} // namespace nodes
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/nodes/include/UnaryOperationNode.h b/libraries/nodes/include/UnaryOperationNode.h
index 63bbba171..c92819786 100644
--- a/libraries/nodes/include/UnaryOperationNode.h
+++ b/libraries/nodes/include/UnaryOperationNode.h
@@ -93,4 +93,291 @@ namespace nodes
 } // namespace nodes
 } // namespace ell
 
-#include "../tcc/UnaryOperationNode.tcc"
\ No newline at end of file
+#pragma region implementation
+
+#define ADD_TO_STRING_ENTRY(NAMESPACE, OPERATOR) \
+    case NAMESPACE::OPERATOR:                    \
+        return #OPERATOR;
+#define BEGIN_FROM_STRING if (false)
+#define ADD_FROM_STRING_ENTRY(NAMESPACE, OPERATOR) else if (name == #OPERATOR) return NAMESPACE::OPERATOR
+
+namespace ell
+{
+namespace nodes
+{
+    namespace UnaryOperations
+    {
+        inline std::string to_string(emitters::UnaryOperationType op)
+        {
+            switch (op)
+            {
+                ADD_TO_STRING_ENTRY(emitters::UnaryOperationType, none);
+                ADD_TO_STRING_ENTRY(emitters::UnaryOperationType, sqrt);
+                ADD_TO_STRING_ENTRY(emitters::UnaryOperationType, logicalNot);
+                ADD_TO_STRING_ENTRY(emitters::UnaryOperationType, tanh);
+                ADD_TO_STRING_ENTRY(emitters::UnaryOperationType, exp);
+                ADD_TO_STRING_ENTRY(emitters::UnaryOperationType, square);
+                ADD_TO_STRING_ENTRY(emitters::UnaryOperationType, log);
+
+            default:
+                throw utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "Unknown unary operation");
+            }
+        }
+
+        inline emitters::UnaryOperationType from_string(std::string name)
+        {
+            BEGIN_FROM_STRING;
+            ADD_FROM_STRING_ENTRY(emitters::UnaryOperationType, none);
+            ADD_FROM_STRING_ENTRY(emitters::UnaryOperationType, sqrt);
+            ADD_FROM_STRING_ENTRY(emitters::UnaryOperationType, logicalNot);
+            ADD_FROM_STRING_ENTRY(emitters::UnaryOperationType, tanh);
+            ADD_FROM_STRING_ENTRY(emitters::UnaryOperationType, exp);
+            ADD_FROM_STRING_ENTRY(emitters::UnaryOperationType, square);
+            ADD_FROM_STRING_ENTRY(emitters::UnaryOperationType, log);
+
+            throw utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "Unknown unary operation");
+        }
+
+        template <typename ValueType>
+        ValueType Sqrt(ValueType a)
+        {
+            return std::sqrt(a);
+        }
+
+        template <>
+        inline bool Sqrt(bool x)
+        {
+            throw utilities::InputException(utilities::InputExceptionErrors::typeMismatch, "Error: taking sqrt of a boolean value");
+        }
+
+        template <typename ValueType>
+        ValueType LogicalNot(ValueType a)
+        {
+            throw utilities::InputException(utilities::InputExceptionErrors::typeMismatch, "Error: taking not of a non-boolean value");
+        }
+
+        template <>
+        inline bool LogicalNot(bool x)
+        {
+            return !x;
+        }
+
+        template <typename ValueType>
+        ValueType Tanh(ValueType a)
+        {
+            return std::tanh(a);
+        }
+
+        template <>
+        inline bool Tanh(bool x)
+        {
+            throw utilities::InputException(utilities::InputExceptionErrors::typeMismatch, "Error: taking tanh of a boolean value");
+        }
+
+        template <typename ValueType>
+        ValueType Exp(ValueType a)
+        {
+            return std::exp(a);
+        }
+
+        template <>
+        inline bool Exp(bool x)
+        {
+            throw utilities::InputException(utilities::InputExceptionErrors::typeMismatch, "Error: taking exp of a boolean value");
+        }
+
+        template <typename ValueType>
+        ValueType Square(ValueType a)
+        {
+            return a * a;
+        }
+
+        template <>
+        inline bool Square(bool)
+        {
+            throw utilities::InputException(utilities::InputExceptionErrors::typeMismatch, "Error: taking square of a boolean value");
+        }
+
+        template <typename ValueType>
+        ValueType Log(ValueType a)
+        {
+            return std::log(a);
+        }
+
+        template <>
+        inline bool Log(bool)
+        {
+            throw utilities::InputException(utilities::InputExceptionErrors::typeMismatch, "Error: taking log of a boolean value");
+        }
+    } // namespace UnaryOperations
+
+    template <typename ValueType>
+    UnaryOperationNode<ValueType>::UnaryOperationNode() :
+        CompilableNode({ &_input }, { &_output }),
+        _input(this, {}, defaultInputPortName),
+        _output(this, defaultOutputPortName, 0),
+        _operation(emitters::UnaryOperationType::none)
+    {
+    }
+
+    template <typename ValueType>
+    UnaryOperationNode<ValueType>::UnaryOperationNode(const model::OutputPort<ValueType>& input, emitters::UnaryOperationType operation) :
+        CompilableNode({ &_input }, { &_output }),
+        _input(this, input, defaultInputPortName),
+        _output(this, defaultOutputPortName, _input.Size()),
+        _operation(operation)
+    {
+    }
+
+    template <typename ValueType>
+    template <typename Operation>
+    std::vector<ValueType> UnaryOperationNode<ValueType>::ComputeOutput(Operation&& function) const
+    {
+        auto output = std::vector<ValueType>(_input.Size());
+        for (size_t index = 0; index < _input.Size(); index++)
+        {
+            output[index] = function(_input[index]);
+        }
+        return output;
+    }
+
+    template <typename ValueType>
+    void UnaryOperationNode<ValueType>::Compute() const
+    {
+        std::vector<ValueType> output;
+        switch (_operation)
+        {
+        case emitters::UnaryOperationType::sqrt:
+            output = ComputeOutput(UnaryOperations::Sqrt<ValueType>);
+            break;
+        case emitters::UnaryOperationType::logicalNot:
+            output = ComputeOutput(UnaryOperations::LogicalNot<ValueType>);
+            break;
+        case emitters::UnaryOperationType::exp:
+            output = ComputeOutput(UnaryOperations::Exp<ValueType>);
+            break;
+        case emitters::UnaryOperationType::tanh:
+            output = ComputeOutput(UnaryOperations::Tanh<ValueType>);
+            break;
+        case emitters::UnaryOperationType::square:
+            output = ComputeOutput(UnaryOperations::Square<ValueType>);
+            break;
+        case emitters::UnaryOperationType::log:
+            output = ComputeOutput(UnaryOperations::Log<ValueType>);
+            break;
+        default:
+            throw utilities::LogicException(utilities::LogicExceptionErrors::notImplemented, "Unknown operation type");
+        }
+        _output.SetOutput(output);
+    };
+
+    template <typename ValueType>
+    void UnaryOperationNode<ValueType>::Copy(model::ModelTransformer& transformer) const
+    {
+        const auto& newPortElements = transformer.GetCorrespondingInputs(_input);
+        auto newNode = transformer.AddNode<UnaryOperationNode<ValueType>>(newPortElements, _operation);
+        transformer.MapNodeOutput(output, newNode->output);
+    }
+
+    template <typename ValueType>
+    emitters::LLVMFunction UnaryOperationNode<ValueType>::GetOperator(emitters::IRFunctionEmitter& function) const
+    {
+        switch (this->GetOperation())
+        {
+        case emitters::UnaryOperationType::sqrt:
+            return function.GetModule().GetRuntime().GetSqrtFunction<ValueType>();
+        case emitters::UnaryOperationType::exp:
+            return function.GetModule().GetRuntime().GetExpFunction<ValueType>();
+        case emitters::UnaryOperationType::log:
+            return function.GetModule().GetRuntime().GetLogFunction<ValueType>();
+        case emitters::UnaryOperationType::logicalNot:
+        {
+            auto& module = function.GetModule();
+            auto& f = module.BeginFunction("logicalNot", emitters::GetVariableType<bool>(), { { "value", emitters::GetVariableType<ValueType>() } });
+            auto args = f.Arguments().begin();
+            llvm::Argument& val = *args;
+            f.Return(f.LogicalNot(&val));
+            module.EndFunction();
+            return f.GetFunction();
+        }
+        case emitters::UnaryOperationType::square:
+        {
+            auto& module = function.GetModule();
+            auto& f = module.BeginFunction("square", emitters::GetVariableType<ValueType>(), { { "value", emitters::GetVariableType<ValueType>() } });
+            auto args = f.Arguments().begin();
+            llvm::Argument& val = *args;
+            f.Return(f.Operator(emitters::GetMultiplyForValueType<ValueType>(), &val, &val));
+            module.EndFunction();
+            return f.GetFunction();
+        }
+        case emitters::UnaryOperationType::tanh:
+            return function.GetModule().GetRuntime().GetTanhFunction<ValueType>();
+        case emitters::UnaryOperationType::none:
+        default:
+            throw emitters::EmitterException(emitters::EmitterError::unaryOperationNotSupported);
+        }
+    }
+
+    template <typename ValueType>
+    void UnaryOperationNode<ValueType>::Compile(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
+    {
+        if (!compiler.GetCompilerOptions().unrollLoops)
+        {
+            CompileLoop(compiler, function);
+        }
+        else
+        {
+            CompileExpanded(compiler, function);
+        }
+    }
+
+    template <typename ValueType>
+    void UnaryOperationNode<ValueType>::CompileLoop(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
+    {
+        // Loop version broken
+        auto count = input.Size();
+        emitters::LLVMValue pInput = compiler.EnsurePortEmitted(input);
+        emitters::LLVMValue pResult = compiler.EnsurePortEmitted(output);
+
+        function.For(count, [pInput, pResult, this](emitters::IRFunctionEmitter& function, emitters::LLVMValue i) {
+            emitters::LLVMValue inputValue = function.ValueAt(pInput, i);
+            emitters::LLVMValue pOpResult = function.Call(GetOperator(function), { inputValue });
+            function.SetValueAt(pResult, i, pOpResult);
+        });
+    }
+
+    template <typename ValueType>
+    void UnaryOperationNode<ValueType>::CompileExpanded(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
+    {
+        emitters::LLVMValue pResult = compiler.EnsurePortEmitted(output);
+
+        for (size_t i = 0; i < input.Size(); ++i)
+        {
+            emitters::LLVMValue inputValue = compiler.LoadPortElementVariable(input.GetInputElement(i));
+            emitters::LLVMValue pOpResult = function.Call(GetOperator(function), { inputValue });
+            function.SetValueAt(pResult, function.Literal((int)i), pOpResult);
+        }
+    }
+
+    template <typename ValueType>
+    void UnaryOperationNode<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
+    {
+        Node::WriteToArchive(archiver);
+        archiver[defaultInputPortName] << _input;
+        archiver["operation"] << UnaryOperations::to_string(_operation);
+    }
+
+    template <typename ValueType>
+    void UnaryOperationNode<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
+    {
+        Node::ReadFromArchive(archiver);
+        archiver[defaultInputPortName] >> _input;
+        std::string operation;
+        archiver["operation"] >> operation;
+        _operation = UnaryOperations::from_string(operation);
+        _output.SetSize(_input.Size());
+    }
+} // namespace nodes
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/nodes/include/ValueSelectorNode.h b/libraries/nodes/include/ValueSelectorNode.h
index c6fa936dd..8b49efe21 100644
--- a/libraries/nodes/include/ValueSelectorNode.h
+++ b/libraries/nodes/include/ValueSelectorNode.h
@@ -76,4 +76,79 @@ namespace nodes
 } // namespace nodes
 } // namespace ell
 
-#include "../tcc/ValueSelectorNode.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace nodes
+{
+    template <typename ValueType>
+    ValueSelectorNode<ValueType>::ValueSelectorNode() :
+        Node({ &_condition, &_input1, &_input2 }, { &_output }),
+        _condition(this, {}, conditionPortName),
+        _input1(this, {}, defaultInput1PortName),
+        _input2(this, {}, defaultInput2PortName),
+        _output(this, defaultOutputPortName, 0)
+    {
+    }
+
+    template <typename ValueType>
+    ValueSelectorNode<ValueType>::ValueSelectorNode(const model::OutputPort<bool>& condition, const model::OutputPort<ValueType>& input1, const model::OutputPort<ValueType>& input2) :
+        Node({ &_condition, &_input1, &_input2 }, { &_output }),
+        _condition(this, condition, conditionPortName),
+        _input1(this, input1, defaultInput1PortName),
+        _input2(this, input2, defaultInput2PortName),
+        _output(this, defaultOutputPortName, input1.Size())
+    {
+        if (condition.Size() != 1)
+        {
+            throw ell::utilities::Exception("Error: Condition must be 1-D signal");
+        }
+
+        if (input1.Size() != input2.Size())
+        {
+            throw ell::utilities::Exception("Error: input values must be same dimension");
+        }
+    };
+
+    template <typename ValueType>
+    void ValueSelectorNode<ValueType>::Compute() const
+    {
+        bool cond = _condition[0];
+        _output.SetOutput(cond ? _input1.GetValue() : _input2.GetValue());
+    };
+
+    template <typename ValueType>
+    void ValueSelectorNode<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
+    {
+        Node::WriteToArchive(archiver);
+        archiver[defaultInput1PortName] << _input1;
+        archiver[defaultInput2PortName] << _input2;
+        archiver[conditionPortName] << _condition;
+    }
+
+    template <typename ValueType>
+    void ValueSelectorNode<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
+    {
+        Node::ReadFromArchive(archiver);
+        archiver[defaultInput1PortName] >> _input1;
+        archiver[defaultInput2PortName] >> _input2;
+        archiver[conditionPortName] >> _condition;
+        _output.SetSize(_input1.Size());
+    }
+
+    template <typename ValueType>
+    void ValueSelectorNode<ValueType>::Copy(model::ModelTransformer& transformer) const
+    {
+        const auto& newCondition = transformer.GetCorrespondingInputs(_condition);
+        const auto& newPortElements1 = transformer.GetCorrespondingInputs(_input1);
+        const auto& newPortElements2 = transformer.GetCorrespondingInputs(_input2);
+
+        auto newNode = transformer.AddNode<ValueSelectorNode<ValueType>>(newCondition, newPortElements1, newPortElements2);
+
+        transformer.MapNodeOutput(output, newNode->output);
+    }
+} // namespace nodes
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/nodes/include/VoiceActivityDetectorNode.h b/libraries/nodes/include/VoiceActivityDetectorNode.h
index ffc9a36bf..2b504bd69 100644
--- a/libraries/nodes/include/VoiceActivityDetectorNode.h
+++ b/libraries/nodes/include/VoiceActivityDetectorNode.h
@@ -99,4 +99,40 @@ namespace nodes
 } // namespace nodes
 } // namespace ell
 
-#include "../tcc/VoiceActivityDetectorNode.tcc"
+#pragma region implementation
+
+#include <utilities/include/Unused.h>
+
+namespace ell
+{
+namespace nodes
+{
+    template <typename ValueType>
+    VoiceActivityDetectorNode<ValueType>::VoiceActivityDetectorNode() :
+        CompilableNode({ &_input }, { &_output }),
+        _input(this, {}, defaultInputPortName),
+        _output(this, defaultOutputPortName, 1)
+    {
+    }
+
+    template <typename ValueType>
+    VoiceActivityDetectorNode<ValueType>::VoiceActivityDetectorNode(const model::OutputPort<ValueType>& input,
+                                                                    double sampleRate,
+                                                                    double frameDuration,
+                                                                    double tauUp,
+                                                                    double tauDown,
+                                                                    double largeInput,
+                                                                    double gainAtt,
+                                                                    double thresholdUp,
+                                                                    double thresholdDown,
+                                                                    double levelThreshold) :
+        CompilableNode({ &_input }, { &_output }),
+        _input(this, input, defaultInputPortName),
+        _output(this, defaultOutputPortName, 1),
+        _vad(sampleRate, input.Size(), frameDuration, tauUp, tauDown, largeInput, gainAtt, thresholdUp, thresholdDown, levelThreshold)
+    {
+    }
+} // namespace nodes
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/nodes/src/MatrixVectorMultiplyNode.cpp b/libraries/nodes/src/MatrixVectorMultiplyNode.cpp
index 62e046581..eda13cdae 100644
--- a/libraries/nodes/src/MatrixVectorMultiplyNode.cpp
+++ b/libraries/nodes/src/MatrixVectorMultiplyNode.cpp
@@ -1,7 +1,7 @@
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 //
 //  Project:  Embedded Learning Library (ELL)
-//  File:     MatrixVectorMultiplyNode.tcc (nodes)
+//  File:     MatrixVectorMultiplyNode.cpp (nodes)
 //  Authors:  Chuck Jacobs
 //
 ////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/libraries/nodes/tcc/AccumulatorNode.tcc b/libraries/nodes/tcc/AccumulatorNode.tcc
deleted file mode 100644
index 2b1721f25..000000000
--- a/libraries/nodes/tcc/AccumulatorNode.tcc
+++ /dev/null
@@ -1,113 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     AccumulatorNode.tcc (nodes)
-//  Authors:  Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace nodes
-{
-    template <typename ValueType>
-    AccumulatorNode<ValueType>::AccumulatorNode() :
-        CompilableNode({ &_input }, { &_output }),
-        _input(this, {}, defaultInputPortName),
-        _output(this, defaultOutputPortName, 0)
-    {
-    }
-
-    template <typename ValueType>
-    AccumulatorNode<ValueType>::AccumulatorNode(const model::OutputPort<ValueType>& input) :
-        CompilableNode({ &_input }, { &_output }),
-        _input(this, input, defaultInputPortName),
-        _output(this, defaultOutputPortName, _input.Size())
-    {
-        auto dimension = input.Size();
-        _accumulator = std::vector<ValueType>(dimension);
-    }
-
-    template <typename ValueType>
-    void AccumulatorNode<ValueType>::Compute() const
-    {
-        for (size_t index = 0; index < _input.Size(); ++index)
-        {
-            _accumulator[index] += _input[index];
-        }
-        _output.SetOutput(_accumulator);
-    };
-
-    template <typename ValueType>
-    void AccumulatorNode<ValueType>::Copy(model::ModelTransformer& transformer) const
-    {
-        const auto& newPortElements = transformer.GetCorrespondingInputs(_input);
-        auto newNode = transformer.AddNode<AccumulatorNode<ValueType>>(newPortElements);
-        transformer.MapNodeOutput(output, newNode->output);
-    }
-
-    template <typename ValueType>
-    void AccumulatorNode<ValueType>::Compile(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
-    {
-        static_assert(!std::is_same<ValueType, bool>(), "Cannot instantiate boolean accumulator nodes");
-        assert(GetPortVariableType(input) == GetPortVariableType(output));
-
-        // Allocate a global variable to accumulate the input
-        emitters::Variable* pAccumulatorVar = function.GetModule().Variables().AddVariable<emitters::InitializedVectorVariable<ValueType>>(emitters::VariableScope::global, output.Size());
-        emitters::LLVMValue accumulator = function.GetModule().EnsureEmitted(*pAccumulatorVar);
-
-        if (!compiler.GetCompilerOptions().unrollLoops)
-        {
-            CompileLoop(compiler, function, accumulator);
-        }
-        else
-        {
-            CompileExpanded(compiler, function, accumulator);
-        }
-    }
-
-    template <typename ValueType>
-    void AccumulatorNode<ValueType>::CompileLoop(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function, emitters::LLVMValue accumulator)
-    {
-        emitters::LLVMValue inputVector = compiler.EnsurePortEmitted(input);
-        emitters::LLVMValue result = compiler.EnsurePortEmitted(output);
-
-        function.VectorOperator(emitters::GetAddForValueType<ValueType>(), output.Size(), accumulator, inputVector, [&accumulator, &result, &function](emitters::LLVMValue i, emitters::LLVMValue value) {
-            function.SetValueAt(accumulator, i, value);
-            function.SetValueAt(result, i, value);
-        });
-    }
-
-    template <typename ValueType>
-    void AccumulatorNode<ValueType>::CompileExpanded(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function, emitters::LLVMValue accumulator)
-    {
-        emitters::LLVMValue result = compiler.EnsurePortEmitted(output);
-        for (size_t index = 0; index < output.Size(); ++index)
-        {
-            emitters::LLVMValue inputValue = compiler.LoadPortElementVariable(input.GetInputElement(index));
-            emitters::LLVMValue accumValue = function.ValueAt(accumulator, function.Literal((int)index));
-            emitters::LLVMValue sum = function.Operator(emitters::GetAddForValueType<ValueType>(), inputValue, accumValue);
-            function.SetValueAt(accumulator, function.Literal((int)index), sum);
-            function.SetValueAt(result, function.Literal((int)index), sum);
-        }
-    }
-
-    template <typename ValueType>
-    void AccumulatorNode<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
-    {
-        Node::WriteToArchive(archiver);
-        archiver[defaultInputPortName] << _input;
-    }
-
-    template <typename ValueType>
-    void AccumulatorNode<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
-    {
-        Node::ReadFromArchive(archiver);
-        archiver[defaultInputPortName] >> _input;
-
-        auto dimension = _input.Size();
-        _accumulator = std::vector<ValueType>(dimension);
-        _output.SetSize(dimension);
-    }
-} // namespace nodes
-} // namespace ell
diff --git a/libraries/nodes/tcc/BinaryFunctionNode.tcc b/libraries/nodes/tcc/BinaryFunctionNode.tcc
deleted file mode 100644
index 77fb00f67..000000000
--- a/libraries/nodes/tcc/BinaryFunctionNode.tcc
+++ /dev/null
@@ -1,253 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     BinaryFunctionNode.tcc (nodes)
-//  Authors:  Lisa Ong
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace nodes
-{
-    template <typename ValueType, typename FunctionType>
-    BinaryFunctionNode<ValueType, FunctionType>::BinaryFunctionNode() :
-        CompilableNode({ &_input1, &_input2 }, { &_output }),
-        _input1(this, {}, defaultInput1PortName),
-        _input2(this, {}, defaultInput2PortName),
-        _output(this, defaultOutputPortName, 0),
-        _paddingValue(0)
-    {
-    }
-
-    template <typename ValueType, typename FunctionType>
-    BinaryFunctionNode<ValueType, FunctionType>::BinaryFunctionNode(const model::OutputPort<ValueType>& input1, const model::OutputPort<ValueType>& input2, FunctionType function, ValueType padding) :
-        BinaryFunctionNode(input1, input2, input1.GetMemoryLayout(), function, padding)
-    {
-    }
-
-    template <typename ValueType, typename FunctionType>
-    BinaryFunctionNode<ValueType, FunctionType>::BinaryFunctionNode(const model::OutputPort<ValueType>& input1, const model::OutputPort<ValueType>& input2, const model::PortMemoryLayout& layout, FunctionType function, ValueType padding) :
-        BinaryFunctionNode(input1, input2, input1.GetMemoryLayout(), input1.GetMemoryLayout(), function, padding)
-    {
-    }
-
-    template <typename ValueType, typename FunctionType>
-    BinaryFunctionNode<ValueType, FunctionType>::BinaryFunctionNode(const model::OutputPort<ValueType>& input1, const model::OutputPort<ValueType>& input2, const model::PortMemoryLayout& inputLayout, const model::PortMemoryLayout& outputLayout, FunctionType function, ValueType padding) :
-        CompilableNode({ &_input1, &_input2 }, { &_output }),
-        _input1(this, input1, defaultInput1PortName),
-        _input2(this, input2, defaultInput2PortName),
-        _inputLayout(inputLayout),
-        _output(this, defaultOutputPortName, outputLayout),
-        _function(std::move(function)),
-        _paddingValue(padding)
-    {
-        if (input1.Size() != input2.Size())
-        {
-            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "Input sizes must match");
-        }
-
-        if (inputLayout.GetActiveSize() != outputLayout.GetActiveSize())
-        {
-            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument,
-                                            ell::utilities::FormatString("Input 1 active area size %d doesn't match input 2 active area size %d on BinaryFunctionNode %s",
-                                                                         inputLayout.GetActiveSize().NumElements(),
-                                                                         outputLayout.GetActiveSize().NumElements(),
-                                                                         GetId().ToString().c_str()));
-        }
-    }
-
-    template <typename ValueType, typename FunctionType>
-    void BinaryFunctionNode<ValueType, FunctionType>::Compute() const
-    {
-        auto outputLayout = _output.GetMemoryLayout();
-        auto outputSize = outputLayout.GetExtent().NumElements();
-        auto output = std::vector<ValueType>(outputSize);
-
-        const size_t prevInputOffset = 0;
-        const size_t prevOutputOffset = 0;
-        ComputeDimensionLoop(0, output, prevInputOffset, prevOutputOffset);
-
-        _output.SetOutput(output);
-    }
-
-    //
-    // Arbitrary-depth nested loops are generated recursively. The ComputeDimensionLoop
-    // function emits `numDimensions` nested loops of the form:
-    //
-    // for(iz = 0; iz < sz; ++iz)
-    // {
-    //     zOffset = (iz+offset[2]) * stride[2];
-    //     for(iy = 0; iy < sy; ++iy)
-    //     {
-    //         yOffset = zOffset + (iy+offset[1]) * stride[1];
-    //         for(ix = 0; ix < sx; ++ix)
-    //         {
-    //             offset = yOffset + (ix+offset[0]) * stride[0];
-    //             x = arr[offset];
-    //             val = f(x);
-    //             output[offset] = val;
-    //         }
-    //     }
-    // }
-    //
-
-    template <typename ValueType, typename FunctionType>
-    void BinaryFunctionNode<ValueType, FunctionType>::ComputeDimensionLoop(size_t dimension,
-                                                                           std::vector<ValueType>& output,
-                                                                           size_t prevInputDimensionOffset,
-                                                                           size_t prevOutputDimensionOffset) const
-    {
-        auto outputLayout = _output.GetMemoryLayout();
-        const auto numDimensions = _inputLayout.NumDimensions();
-        auto&& inputStride = _inputLayout.GetExtent();
-        auto&& inputOffset = _inputLayout.GetOffset();
-        auto&& inputSize = _inputLayout.GetActiveSize();
-        auto&& outputOffset = outputLayout.GetOffset();
-        auto&& outputStride = outputLayout.GetExtent();
-
-        for (int loopIndex = 0; loopIndex < inputSize[dimension]; ++loopIndex)
-        {
-            // offset within start of this dimension = (loopIndex + offset[dimension])
-            auto thisInputDimensionInternalOffset = loopIndex + inputOffset[dimension];
-            auto thisOutputDimensionInternalOffset = loopIndex + outputOffset[dimension];
-
-            size_t thisInputDimensionOffset = thisInputDimensionInternalOffset;
-            size_t thisOutputDimensionOffset = thisOutputDimensionInternalOffset;
-            if (dimension != 0)
-            {
-                thisInputDimensionOffset += prevInputDimensionOffset * inputStride[dimension];
-                thisOutputDimensionOffset += prevOutputDimensionOffset * outputStride[dimension];
-            }
-
-            if (static_cast<int>(dimension) < numDimensions - 1)
-            {
-                // Recursive call to emit nested loop
-                ComputeDimensionLoop(dimension + 1, output, thisInputDimensionOffset, thisOutputDimensionOffset);
-            }
-            else
-            {
-                // We're in the innermost loop --- compute the value
-                auto value1 = _input1[thisInputDimensionOffset];
-                auto value2 = _input2[thisInputDimensionOffset];
-                auto outputValue = _function.Compute(value1, value2);
-                output[thisOutputDimensionOffset] = outputValue;
-            }
-        }
-    }
-
-    template <typename ValueType, typename FunctionType>
-    void BinaryFunctionNode<ValueType, FunctionType>::Compile(model::IRMapCompiler& compiler,
-                                                              emitters::IRFunctionEmitter& function)
-    {
-        emitters::LLVMValue pInput1 = compiler.EnsurePortEmitted(input1);
-        emitters::LLVMValue pInput2 = compiler.EnsurePortEmitted(input2);
-        emitters::LLVMValue pResult = compiler.EnsurePortEmitted(output, _paddingValue);
-
-        // Call recursive function to emit nested loops
-        emitters::LLVMValue prevInputDimensionOffset = nullptr;
-        emitters::LLVMValue prevOutputDimensionOffset = nullptr;
-        EmitComputeDimensionLoop(compiler, function, 0, pInput1, pInput2, pResult, prevInputDimensionOffset, prevOutputDimensionOffset);
-    }
-
-    template <typename ValueType, typename FunctionType>
-    void BinaryFunctionNode<ValueType, FunctionType>::EmitComputeDimensionLoop(model::IRMapCompiler& compiler,
-                                                                               emitters::IRFunctionEmitter& function,
-                                                                               size_t dimension,
-                                                                               emitters::LLVMValue input1,
-                                                                               emitters::LLVMValue input2,
-                                                                               emitters::LLVMValue output,
-                                                                               emitters::LLVMValue prevInputDimensionOffset,
-                                                                               emitters::LLVMValue prevOutputDimensionOffset) const
-    {
-        auto outputLayout = _output.GetMemoryLayout();
-        const auto numDimensions = _inputLayout.NumDimensions();
-        auto&& inputStride = _inputLayout.GetExtent();
-        auto&& inputOffset = _inputLayout.GetOffset();
-        auto&& inputSize = _inputLayout.GetActiveSize();
-        auto&& outputStride = outputLayout.GetExtent();
-        auto&& outputOffset = outputLayout.GetOffset();
-
-        function.For(inputSize[dimension], [dimension, numDimensions, inputOffset, inputStride, outputOffset, outputStride, prevInputDimensionOffset, prevOutputDimensionOffset, input1, input2, output, &compiler, this](emitters::IRFunctionEmitter& function, emitters::LLVMValue loopIndex) {
-            // Calculate the offset within this dimension = (loopIndex + offset[dimension])
-            emitters::LLVMValue thisInputDimensionInternalOffset = function.Operator(emitters::GetAddForValueType<int>(), loopIndex, function.Literal<int>(inputOffset[dimension]));
-            emitters::LLVMValue thisOutputDimensionInternalOffset = function.Operator(emitters::GetAddForValueType<int>(), loopIndex, function.Literal<int>(outputOffset[dimension]));
-
-            // Calculate the total offset from beginning of memory:
-            //   * if in the outermost loop, the offset into this dimension
-            //   * otherwise, the offset into this dimension plus the previous offset scaled by the previous dimension's stride
-            emitters::LLVMValue thisInputDimensionOffset = nullptr;
-            emitters::LLVMValue thisOutputDimensionOffset = nullptr;
-            if (dimension == 0)
-            {
-                assert(prevInputDimensionOffset == nullptr);
-                assert(prevOutputDimensionOffset == nullptr);
-                thisInputDimensionOffset = thisInputDimensionInternalOffset;
-                thisOutputDimensionOffset = thisOutputDimensionInternalOffset;
-            }
-            else
-            {
-                auto scaledInputDimensionOffset = function.Operator(emitters::GetMultiplyForValueType<int>(), prevInputDimensionOffset, function.Literal<int>(inputStride[dimension]));
-                thisInputDimensionOffset = function.Operator(emitters::GetAddForValueType<int>(), scaledInputDimensionOffset, thisInputDimensionInternalOffset);
-
-                auto scaledOutputDimensionOffset = function.Operator(emitters::GetMultiplyForValueType<int>(), prevOutputDimensionOffset, function.Literal<int>(outputStride[dimension]));
-                thisOutputDimensionOffset = function.Operator(emitters::GetAddForValueType<int>(), scaledOutputDimensionOffset, thisOutputDimensionInternalOffset);
-            }
-
-            if (static_cast<int>(dimension) < numDimensions - 1)
-            {
-                // Recursive call to emit nested loop
-                EmitComputeDimensionLoop(compiler, function, dimension + 1, input1, input2, output, thisInputDimensionOffset, thisOutputDimensionOffset);
-            }
-            else
-            {
-                // We're in the innermost loop --- compute the value
-                auto value1 = function.ValueAt(input1, thisInputDimensionOffset);
-                auto value2 = function.ValueAt(input2, thisInputDimensionOffset);
-                auto outputValue = _function.Compile(function, value1, value2);
-                function.SetValueAt(output, thisOutputDimensionOffset, outputValue);
-            }
-        });
-    }
-
-    template <typename ValueType, typename FunctionType>
-    void BinaryFunctionNode<ValueType, FunctionType>::Copy(model::ModelTransformer& transformer) const
-    {
-        auto outputLayout = _output.GetMemoryLayout();
-        const auto& portElements1 = transformer.GetCorrespondingInputs(_input1);
-        const auto& portElements2 = transformer.GetCorrespondingInputs(_input2);
-        auto newNode = transformer.AddNode<BinaryFunctionNode<ValueType, FunctionType>>(portElements1, portElements2, _inputLayout, outputLayout, _function, _paddingValue);
-        transformer.MapNodeOutput(output, newNode->output);
-    }
-
-    template <typename ValueType, typename FunctionType>
-    ell::utilities::ArchiveVersion BinaryFunctionNode<ValueType, FunctionType>::GetArchiveVersion() const
-    {
-        return { ell::utilities::ArchiveVersionNumbers::v8_port_memory_layout };
-    }
-
-    template <typename ValueType, typename FunctionType>
-    void BinaryFunctionNode<ValueType, FunctionType>::WriteToArchive(utilities::Archiver& archiver) const
-    {
-        model::CompilableNode::WriteToArchive(archiver);
-        archiver[defaultInput1PortName] << _input1;
-        archiver[defaultInput2PortName] << _input2;
-        archiver["paddingValue"] << _paddingValue;
-        archiver["inputLayout"] << _inputLayout;
-        archiver["outputLayout"] << _output.GetMemoryLayout();
-    }
-
-    template <typename ValueType, typename FunctionType>
-    void BinaryFunctionNode<ValueType, FunctionType>::ReadFromArchive(utilities::Unarchiver& archiver)
-    {
-        model::CompilableNode::ReadFromArchive(archiver);
-        archiver[defaultInput1PortName] >> _input1;
-        archiver[defaultInput2PortName] >> _input2;
-        archiver["paddingValue"] >> _paddingValue;
-        archiver["inputLayout"] >> _inputLayout;
-        model::PortMemoryLayout outputLayout;
-        archiver["outputLayout"] >> outputLayout;
-        _output.SetMemoryLayout(outputLayout);
-    }
-} // namespace nodes
-} // namespace ell
diff --git a/libraries/nodes/tcc/BinaryOperationNode.tcc b/libraries/nodes/tcc/BinaryOperationNode.tcc
deleted file mode 100644
index b7500153e..000000000
--- a/libraries/nodes/tcc/BinaryOperationNode.tcc
+++ /dev/null
@@ -1,516 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     BinaryOperationNode.tcc (nodes)
-//  Authors:  Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#define ADD_TO_STRING_ENTRY(NAMESPACE, OPERATOR) \
-    case NAMESPACE::OPERATOR:                    \
-        return #OPERATOR;
-#define BEGIN_FROM_STRING if (false)
-#define ADD_FROM_STRING_ENTRY(NAMESPACE, OPERATOR) else if (name == #OPERATOR) return NAMESPACE::OPERATOR
-
-namespace ell
-{
-namespace nodes
-{
-    namespace BinaryOperations
-    {
-        inline std::string to_string(emitters::BinaryOperationType op)
-        {
-            switch (op)
-            {
-                ADD_TO_STRING_ENTRY(emitters::BinaryOperationType, none);
-                ADD_TO_STRING_ENTRY(emitters::BinaryOperationType, add);
-                ADD_TO_STRING_ENTRY(emitters::BinaryOperationType, subtract);
-                ADD_TO_STRING_ENTRY(emitters::BinaryOperationType, coordinatewiseMultiply);
-                ADD_TO_STRING_ENTRY(emitters::BinaryOperationType, coordinatewiseDivide);
-                ADD_TO_STRING_ENTRY(emitters::BinaryOperationType, logicalAnd);
-                ADD_TO_STRING_ENTRY(emitters::BinaryOperationType, logicalOr);
-                ADD_TO_STRING_ENTRY(emitters::BinaryOperationType, logicalXor);
-            default:
-                throw utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "Unknown binary operation");
-            }
-        }
-
-        inline emitters::BinaryOperationType from_string(std::string name)
-        {
-            BEGIN_FROM_STRING;
-            ADD_FROM_STRING_ENTRY(emitters::BinaryOperationType, none);
-            ADD_FROM_STRING_ENTRY(emitters::BinaryOperationType, add);
-            ADD_FROM_STRING_ENTRY(emitters::BinaryOperationType, subtract);
-            ADD_FROM_STRING_ENTRY(emitters::BinaryOperationType, coordinatewiseMultiply);
-            ADD_FROM_STRING_ENTRY(emitters::BinaryOperationType, coordinatewiseDivide);
-            ADD_FROM_STRING_ENTRY(emitters::BinaryOperationType, logicalAnd);
-            ADD_FROM_STRING_ENTRY(emitters::BinaryOperationType, logicalOr);
-            ADD_FROM_STRING_ENTRY(emitters::BinaryOperationType, logicalXor);
-
-            throw utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "Unknown binary operation");
-        }
-
-        template <typename ValueType>
-        ValueType Add(ValueType a, ValueType b)
-        {
-            return a + b;
-        }
-
-        template <>
-        inline bool Add(bool a, bool b)
-        {
-            throw utilities::InputException(utilities::InputExceptionErrors::typeMismatch);
-        }
-
-        template <typename ValueType>
-        ValueType Subtract(ValueType a, ValueType b)
-        {
-            return a - b;
-        }
-
-        template <>
-        inline bool Subtract(bool a, bool b)
-        {
-            throw utilities::InputException(utilities::InputExceptionErrors::typeMismatch);
-        }
-
-        template <typename ValueType>
-        ValueType Multiply(ValueType a, ValueType b)
-        {
-            return a * b;
-        }
-
-        template <>
-        inline bool Multiply(bool a, bool b)
-        {
-            throw utilities::InputException(utilities::InputExceptionErrors::typeMismatch);
-        }
-
-        template <typename ValueType>
-        ValueType Divide(ValueType a, ValueType b)
-        {
-            return a / b;
-        }
-
-        template <>
-        inline bool Divide(bool a, bool b)
-        {
-            throw utilities::InputException(utilities::InputExceptionErrors::typeMismatch);
-        }
-
-        //
-        // Logical operations
-        //
-        template <typename ValueType>
-        ValueType LogicalAnd(ValueType a, ValueType b)
-        {
-            throw utilities::InputException(utilities::InputExceptionErrors::typeMismatch);
-        }
-
-        template <>
-        inline bool LogicalAnd(bool a, bool b)
-        {
-            return a && b;
-        }
-
-        template <typename ValueType>
-        ValueType LogicalOr(ValueType a, ValueType b)
-        {
-            throw utilities::InputException(utilities::InputExceptionErrors::typeMismatch);
-        }
-
-        template <>
-        inline bool LogicalOr(bool a, bool b)
-        {
-            return a || b;
-        }
-
-        template <typename ValueType>
-        ValueType LogicalXor(ValueType a, ValueType b)
-        {
-            throw utilities::InputException(utilities::InputExceptionErrors::typeMismatch);
-        }
-
-        template <>
-        inline bool LogicalXor(bool a, bool b)
-        {
-            return (!a) != (!b);
-        }
-    } // namespace BinaryOperations
-
-    template <typename ValueType>
-    BinaryOperationNode<ValueType>::BinaryOperationNode() :
-        CompilableNode({ &_input1, &_input2 }, { &_output }),
-        _input1(this, {}, defaultInput1PortName),
-        _input2(this, {}, defaultInput2PortName),
-        _output(this, defaultOutputPortName, 0),
-        _operation(emitters::BinaryOperationType::none)
-    {
-    }
-
-    template <typename ValueType>
-    BinaryOperationNode<ValueType>::BinaryOperationNode(const model::OutputPort<ValueType>& input1, const model::OutputPort<ValueType>& input2, emitters::BinaryOperationType operation) :
-        CompilableNode({ &_input1, &_input2 }, { &_output }),
-        _input1(this, input1, defaultInput1PortName),
-        _inputLayout1(input1.GetMemoryLayout()),
-        _input2(this, input2, defaultInput2PortName),
-        _inputLayout2(input2.GetMemoryLayout()),
-        _output(this, defaultOutputPortName, input1.GetMemoryLayout()),
-        _operation(operation),
-        _paddingValue(0)
-    {
-        if (_inputLayout1.GetActiveSize() != _inputLayout2.GetActiveSize())
-        {
-            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "Active areas must match for both inputs");
-        }
-    }
-
-    template <typename ValueType>
-    BinaryOperationNode<ValueType>::BinaryOperationNode(const model::OutputPort<ValueType>& input1,
-                                                        const model::OutputPort<ValueType>& input2,
-                                                        const model::PortMemoryLayout& layout,
-                                                        emitters::BinaryOperationType operation,
-                                                        ValueType padding) :
-        CompilableNode({ &_input1, &_input2 }, { &_output }),
-        _input1(this, input1, defaultInput1PortName),
-        _inputLayout1(layout),
-        _input2(this, input2, defaultInput2PortName),
-        _inputLayout2(layout),
-        _output(this, defaultOutputPortName, layout),
-        _operation(operation),
-        _paddingValue(padding)
-    {
-    }
-
-    template <typename ValueType>
-    BinaryOperationNode<ValueType>::BinaryOperationNode(const model::OutputPort<ValueType>& input1,
-                                                        const model::PortMemoryLayout& inputLayout1,
-                                                        const model::OutputPort<ValueType>& input2,
-                                                        const model::PortMemoryLayout& inputLayout2,
-                                                        const model::PortMemoryLayout& outputLayout,
-                                                        emitters::BinaryOperationType operation,
-                                                        ValueType padding) :
-        CompilableNode({ &_input1, &_input2 }, { &_output }),
-        _input1(this, input1, defaultInput1PortName),
-        _inputLayout1(inputLayout1),
-        _input2(this, input2, defaultInput2PortName),
-        _inputLayout2(inputLayout2),
-        _output(this, defaultOutputPortName, outputLayout),
-        _operation(operation),
-        _paddingValue(padding)
-    {
-        if (inputLayout1.GetActiveSize() != inputLayout2.GetActiveSize())
-        {
-            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "Active areas must match for both inputs");
-        }
-        if (inputLayout1.GetActiveSize() != outputLayout.GetActiveSize())
-        {
-            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "Input and output active areas must match");
-        }
-    }
-
-    template <typename ValueType>
-    template <typename Operation>
-    std::vector<ValueType> BinaryOperationNode<ValueType>::ComputeOutput(Operation&& function) const
-    {
-        auto outputLayout = _output.GetMemoryLayout();
-        auto outputSize = outputLayout.GetExtent().NumElements();
-        auto output = std::vector<ValueType>(outputSize);
-
-        const size_t prevInput1Offset = 0;
-        const size_t prevInput2Offset = 0;
-        const size_t prevOutputOffset = 0;
-        ComputeDimensionLoop(function, 0, output, prevInput1Offset, prevInput2Offset, prevOutputOffset);
-
-        return output;
-    }
-
-    template <typename ValueType>
-    void BinaryOperationNode<ValueType>::Compute() const
-    {
-        std::vector<ValueType> output;
-        switch (_operation)
-        {
-        case emitters::BinaryOperationType::add:
-            output = ComputeOutput(BinaryOperations::Add<ValueType>);
-            break;
-        case emitters::BinaryOperationType::subtract:
-            output = ComputeOutput(BinaryOperations::Subtract<ValueType>);
-            break;
-        case emitters::BinaryOperationType::coordinatewiseMultiply:
-            output = ComputeOutput(BinaryOperations::Multiply<ValueType>);
-            break;
-        case emitters::BinaryOperationType::coordinatewiseDivide:
-            output = ComputeOutput(BinaryOperations::Divide<ValueType>);
-            break;
-        case emitters::BinaryOperationType::logicalAnd:
-            output = ComputeOutput(BinaryOperations::LogicalAnd<ValueType>);
-            break;
-        case emitters::BinaryOperationType::logicalOr:
-            output = ComputeOutput(BinaryOperations::LogicalOr<ValueType>);
-            break;
-        case emitters::BinaryOperationType::logicalXor:
-            output = ComputeOutput(BinaryOperations::LogicalXor<ValueType>);
-            break;
-        default:
-            throw utilities::LogicException(utilities::LogicExceptionErrors::notImplemented, "Unknown operation type");
-        }
-        _output.SetOutput(output);
-    };
-
-    template <typename ValueType>
-    void BinaryOperationNode<ValueType>::Copy(model::ModelTransformer& transformer) const
-    {
-        const auto& PortElements1 = transformer.GetCorrespondingInputs(_input1);
-        const auto& PortElements2 = transformer.GetCorrespondingInputs(_input2);
-        auto outputLayout = _output.GetMemoryLayout();
-        auto newNode = transformer.AddNode<BinaryOperationNode<ValueType>>(PortElements1, _inputLayout1, PortElements2, _inputLayout2, outputLayout, _operation);
-        transformer.MapNodeOutput(output, newNode->output);
-    }
-
-    template <typename ValueType>
-    void BinaryOperationNode<ValueType>::Compile(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
-    {
-        if (_inputLayout1.GetMemorySize() == _inputLayout2.GetMemorySize())
-        {
-            if (!compiler.GetCompilerOptions().unrollLoops)
-            {
-                CompileLoop(compiler, function);
-            }
-            else
-            {
-                CompileExpanded(compiler, function);
-            }
-        }
-        else
-        {
-            emitters::LLVMValue pInput1 = compiler.EnsurePortEmitted(input1);
-            emitters::LLVMValue pInput2 = compiler.EnsurePortEmitted(input2);
-            emitters::LLVMValue pResult = compiler.EnsurePortEmitted(output, _paddingValue);
-
-            // Call recursive function to emit nested loops
-            emitters::LLVMValue prevInput1DimensionOffset = nullptr;
-            emitters::LLVMValue prevInput2DimensionOffset = nullptr;
-            emitters::LLVMValue prevOutputDimensionOffset = nullptr;
-            EmitComputeDimensionLoop(compiler, function, 0, pInput1, pInput2, pResult, prevInput1DimensionOffset, prevInput2DimensionOffset, prevOutputDimensionOffset);
-        }
-    }
-
-    template <typename ValueType>
-    void BinaryOperationNode<ValueType>::CompileLoop(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
-    {
-        emitters::LLVMValue pInput1 = compiler.EnsurePortEmitted(input1);
-        emitters::LLVMValue pInput2 = compiler.EnsurePortEmitted(input2);
-        emitters::LLVMValue pResult = compiler.EnsurePortEmitted(output);
-
-        auto count = input1.Size();
-        function.VectorOperator(emitters::GetOperator<ValueType>(GetOperation()), count, pInput1, pInput2, [&pResult, &function](emitters::LLVMValue i, emitters::LLVMValue pValue) {
-            function.SetValueAt(pResult, i, pValue);
-        });
-    }
-
-    template <typename ValueType>
-    void BinaryOperationNode<ValueType>::CompileExpanded(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
-    {
-        emitters::LLVMValue pResult = compiler.EnsurePortEmitted(output);
-
-        auto count = input1.Size();
-        for (size_t i = 0; i < count; ++i)
-        {
-            emitters::LLVMValue inputValue1 = compiler.LoadPortElementVariable(input1.GetInputElement(i));
-            emitters::LLVMValue inputValue2 = compiler.LoadPortElementVariable(input2.GetInputElement(i));
-            emitters::LLVMValue pOpResult = function.Operator(emitters::GetOperator<ValueType>(GetOperation()), inputValue1, inputValue2);
-            function.SetValueAt(pResult, function.Literal<int>(i), pOpResult);
-        }
-    }
-
-    //
-    // Arbitrary-depth nested loops are generated recursively. The ComputeDimensionLoop
-    // function emits `numDimensions` nested loops of the form:
-    //
-    // for(iz = 0; iz < sz; ++iz)
-    // {
-    //     zOffset = (iz+offset[2]) * stride[2];
-    //     for(iy = 0; iy < sy; ++iy)
-    //     {
-    //         yOffset = zOffset + (iy+offset[1]) * stride[1];
-    //         for(ix = 0; ix < sx; ++ix)
-    //         {
-    //             offset = yOffset + (ix+offset[0]) * stride[0];
-    //             x = arr[offset];
-    //             val = f(x);
-    //             output[offset] = val;
-    //         }
-    //     }
-    // }
-    //
-
-    template <typename ValueType>
-    template <typename Operation>
-    void BinaryOperationNode<ValueType>::ComputeDimensionLoop(Operation& function,
-                                                              size_t dimension,
-                                                              std::vector<ValueType>& output,
-                                                              size_t prevInput1DimensionOffset,
-                                                              size_t prevInput2DimensionOffset,
-                                                              size_t prevOutputDimensionOffset) const
-    {
-        auto outputLayout = _output.GetMemoryLayout();
-        const auto numDimensions = _inputLayout1.NumDimensions();
-        auto&& inputStride1 = _inputLayout1.GetExtent();
-        auto&& inputOffset1 = _inputLayout1.GetOffset();
-        auto&& inputStride2 = _inputLayout2.GetExtent();
-        auto&& inputOffset2 = _inputLayout2.GetOffset();
-        auto&& inputSize = _inputLayout1.GetActiveSize();
-        auto&& outputOffset = outputLayout.GetOffset();
-        auto&& outputStride = outputLayout.GetExtent();
-
-        for (int loopIndex = 0; loopIndex < inputSize[dimension]; ++loopIndex)
-        {
-            // offset within start of this dimension = (loopIndex + offset[dimension])
-            auto thisInput1DimensionInternalOffset = loopIndex + inputOffset1[dimension];
-            auto thisInput2DimensionInternalOffset = loopIndex + inputOffset2[dimension];
-            auto thisOutputDimensionInternalOffset = loopIndex + outputOffset[dimension];
-
-            size_t thisInput1DimensionOffset = thisInput1DimensionInternalOffset;
-            size_t thisInput2DimensionOffset = thisInput2DimensionInternalOffset;
-            size_t thisOutputDimensionOffset = thisOutputDimensionInternalOffset;
-            if (dimension != 0)
-            {
-                thisInput1DimensionOffset += prevInput1DimensionOffset * inputStride1[dimension];
-                thisInput2DimensionOffset += prevInput2DimensionOffset * inputStride2[dimension];
-                thisOutputDimensionOffset += prevOutputDimensionOffset * outputStride[dimension];
-            }
-
-            if (static_cast<int>(dimension) < numDimensions - 1)
-            {
-                // Recursive call to emit nested loop
-                ComputeDimensionLoop(function, dimension + 1, output, thisInput1DimensionOffset, thisInput2DimensionOffset, thisOutputDimensionOffset);
-            }
-            else
-            {
-                // We're in the innermost loop --- compute the value
-                auto value1 = _input1[thisInput1DimensionOffset];
-                auto value2 = _input2[thisInput2DimensionOffset];
-                auto outputValue = function(value1, value2);
-                output[thisOutputDimensionOffset] = outputValue;
-            }
-        }
-    }
-
-    template <typename ValueType>
-    void BinaryOperationNode<ValueType>::EmitComputeDimensionLoop(model::IRMapCompiler& compiler,
-                                                                  emitters::IRFunctionEmitter& function,
-                                                                  size_t dimension,
-                                                                  emitters::LLVMValue input1,
-                                                                  emitters::LLVMValue input2,
-                                                                  emitters::LLVMValue output,
-                                                                  emitters::LLVMValue prevInput1DimensionOffset,
-                                                                  emitters::LLVMValue prevInput2DimensionOffset,
-                                                                  emitters::LLVMValue prevOutputDimensionOffset) const
-    {
-        auto outputLayout = _output.GetMemoryLayout();
-        const auto numDimensions = _inputLayout1.NumDimensions();
-        auto&& inputStride1 = _inputLayout1.GetExtent();
-        auto&& inputOffset1 = _inputLayout1.GetOffset();
-        auto&& inputStride2 = _inputLayout2.GetExtent();
-        auto&& inputOffset2 = _inputLayout2.GetOffset();
-        auto&& inputSize = _inputLayout1.GetActiveSize();
-        auto&& outputStride = outputLayout.GetExtent();
-        auto&& outputOffset = outputLayout.GetOffset();
-
-        function.For(inputSize[dimension], [input1, input2, output, inputOffset1, inputOffset2, inputStride1, inputStride2, outputStride, outputOffset, prevInput1DimensionOffset, prevInput2DimensionOffset, prevOutputDimensionOffset, dimension, numDimensions, &compiler, this](emitters::IRFunctionEmitter& function, emitters::LLVMValue loopIndex) {
-            // Calculate the offset within this dimension = (loopIndex + offset[dimension])
-            emitters::LLVMValue thisInput1DimensionInternalOffset = function.Operator(emitters::GetAddForValueType<int>(), loopIndex, function.Literal<int>(inputOffset1[dimension]));
-            emitters::LLVMValue thisInput2DimensionInternalOffset = function.Operator(emitters::GetAddForValueType<int>(), loopIndex, function.Literal<int>(inputOffset2[dimension]));
-            emitters::LLVMValue thisOutputDimensionInternalOffset = function.Operator(emitters::GetAddForValueType<int>(), loopIndex, function.Literal<int>(outputOffset[dimension]));
-
-            // Calculate the total offset from beginning of memory:
-            //   * if in the outermost loop, the offset into this dimension
-            //   * otherwise, the offset into this dimension plus the previous offset scaled by the previous dimension's stride
-            emitters::LLVMValue thisInput1DimensionOffset = nullptr;
-            emitters::LLVMValue thisInput2DimensionOffset = nullptr;
-            emitters::LLVMValue thisOutputDimensionOffset = nullptr;
-            if (dimension == 0)
-            {
-                assert(prevInput1DimensionOffset == nullptr);
-                assert(prevInput2DimensionOffset == nullptr);
-                assert(prevOutputDimensionOffset == nullptr);
-                thisInput1DimensionOffset = thisInput1DimensionInternalOffset;
-                thisInput2DimensionOffset = thisInput2DimensionInternalOffset;
-                thisOutputDimensionOffset = thisOutputDimensionInternalOffset;
-            }
-            else
-            {
-                auto scaledInput1DimensionOffset = function.Operator(emitters::GetMultiplyForValueType<int>(), prevInput1DimensionOffset, function.Literal<int>(inputStride1[dimension]));
-                auto scaledInput2DimensionOffset = function.Operator(emitters::GetMultiplyForValueType<int>(), prevInput2DimensionOffset, function.Literal<int>(inputStride2[dimension]));
-                thisInput1DimensionOffset = function.Operator(emitters::GetAddForValueType<int>(), scaledInput1DimensionOffset, thisInput1DimensionInternalOffset);
-                thisInput2DimensionOffset = function.Operator(emitters::GetAddForValueType<int>(), scaledInput2DimensionOffset, thisInput2DimensionInternalOffset);
-
-                auto scaledOutputDimensionOffset = function.Operator(emitters::GetMultiplyForValueType<int>(), prevOutputDimensionOffset, function.Literal<int>(outputStride[dimension]));
-                thisOutputDimensionOffset = function.Operator(emitters::GetAddForValueType<int>(), scaledOutputDimensionOffset, thisOutputDimensionInternalOffset);
-            }
-
-            if (static_cast<int>(dimension) < numDimensions - 1)
-            {
-                // Recursive call to emit nested loop
-                EmitComputeDimensionLoop(compiler, function, dimension + 1, input1, input2, output, thisInput1DimensionOffset, thisInput2DimensionOffset, thisOutputDimensionOffset);
-            }
-            else
-            {
-                // We're in the innermost loop --- compute the value
-                auto value1 = function.ValueAt(input1, thisInput1DimensionOffset);
-                auto value2 = function.ValueAt(input2, thisInput2DimensionOffset);
-                auto outputValue = function.Operator(emitters::GetOperator<ValueType>(GetOperation()), value1, value2);
-                function.SetValueAt(output, thisOutputDimensionOffset, outputValue);
-            }
-        });
-    }
-
-    template <typename ValueType>
-    utilities::ArchiveVersion BinaryOperationNode<ValueType>::GetArchiveVersion() const
-    {
-        constexpr utilities::ArchiveVersion archiveVersion = { utilities::ArchiveVersionNumbers::v7_binary_operation_active_regions };
-
-        return archiveVersion;
-    }
-
-    template <typename ValueType>
-    bool BinaryOperationNode<ValueType>::CanReadArchiveVersion(const utilities::ArchiveVersion& version) const
-    {
-        constexpr utilities::ArchiveVersion archiveVersion = { utilities::ArchiveVersionNumbers::v7_binary_operation_active_regions };
-
-        return version >= archiveVersion;
-    }
-
-    template <typename ValueType>
-    void BinaryOperationNode<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
-    {
-        Node::WriteToArchive(archiver);
-        archiver[defaultInput1PortName] << _input1;
-        archiver[defaultInput2PortName] << _input2;
-        archiver["inputLayout1"] << _inputLayout1;
-        archiver["inputLayout2"] << _inputLayout2;
-        archiver["operation"] << BinaryOperations::to_string(_operation);
-        auto outputLayout = _output.GetMemoryLayout();
-        archiver["outputLayout"] << outputLayout;
-        archiver["padding"] << _paddingValue;
-    }
-
-    template <typename ValueType>
-    void BinaryOperationNode<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
-    {
-        Node::ReadFromArchive(archiver);
-        archiver[defaultInput1PortName] >> _input1;
-        archiver[defaultInput2PortName] >> _input2;
-        archiver["inputLayout1"] >> _inputLayout1;
-        archiver["inputLayout2"] >> _inputLayout2;
-        std::string operation;
-        archiver["operation"] >> operation;
-        _operation = BinaryOperations::from_string(operation);
-        model::PortMemoryLayout outputLayout;
-        archiver["outputLayout"] >> outputLayout;
-        _output.SetMemoryLayout(outputLayout);
-        archiver["padding"] >> _paddingValue;
-    }
-} // namespace nodes
-} // namespace ell
diff --git a/libraries/nodes/tcc/BinaryPredicateNode.tcc b/libraries/nodes/tcc/BinaryPredicateNode.tcc
deleted file mode 100644
index 4fc6c57f5..000000000
--- a/libraries/nodes/tcc/BinaryPredicateNode.tcc
+++ /dev/null
@@ -1,230 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     BinaryPredicateNode.tcc (nodes)
-//  Authors:  Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#define ADD_TO_STRING_ENTRY(NAMESPACE, OPERATOR) \
-    case NAMESPACE::OPERATOR:                    \
-        return #OPERATOR;
-#define BEGIN_FROM_STRING if (false)
-#define ADD_FROM_STRING_ENTRY(NAMESPACE, OPERATOR) else if (name == #OPERATOR) return NAMESPACE::OPERATOR
-
-namespace ell
-{
-namespace nodes
-{
-    namespace BinaryPredicates
-    {
-        inline std::string to_string(emitters::BinaryPredicateType op)
-        {
-            switch (op)
-            {
-                ADD_TO_STRING_ENTRY(emitters::BinaryPredicateType, none);
-                ADD_TO_STRING_ENTRY(emitters::BinaryPredicateType, equal);
-                ADD_TO_STRING_ENTRY(emitters::BinaryPredicateType, less);
-                ADD_TO_STRING_ENTRY(emitters::BinaryPredicateType, greater);
-                ADD_TO_STRING_ENTRY(emitters::BinaryPredicateType, notEqual);
-                ADD_TO_STRING_ENTRY(emitters::BinaryPredicateType, lessOrEqual);
-                ADD_TO_STRING_ENTRY(emitters::BinaryPredicateType, greaterOrEqual);
-            default:
-                throw utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "Unknown binary predicate");
-            }
-        }
-
-        inline emitters::BinaryPredicateType from_string(std::string name)
-        {
-            BEGIN_FROM_STRING;
-            ADD_FROM_STRING_ENTRY(emitters::BinaryPredicateType, none);
-            ADD_FROM_STRING_ENTRY(emitters::BinaryPredicateType, equal);
-            ADD_FROM_STRING_ENTRY(emitters::BinaryPredicateType, less);
-            ADD_FROM_STRING_ENTRY(emitters::BinaryPredicateType, greater);
-            ADD_FROM_STRING_ENTRY(emitters::BinaryPredicateType, notEqual);
-            ADD_FROM_STRING_ENTRY(emitters::BinaryPredicateType, lessOrEqual);
-            ADD_FROM_STRING_ENTRY(emitters::BinaryPredicateType, greaterOrEqual);
-
-            throw utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "Unknown binary predicate");
-        }
-
-        template <typename ValueType>
-        bool Equal(ValueType a, ValueType b)
-        {
-            return a == b;
-        }
-
-        template <typename ValueType>
-        bool Less(ValueType a, ValueType b)
-        {
-            return a < b;
-        }
-
-        template <typename ValueType>
-        bool Greater(ValueType a, ValueType b)
-        {
-            return a > b;
-        }
-
-        template <typename ValueType>
-        bool NotEqual(ValueType a, ValueType b)
-        {
-            return a != b;
-        }
-
-        template <typename ValueType>
-        bool LessOrEqual(ValueType a, ValueType b)
-        {
-            return a <= b;
-        }
-
-        template <typename ValueType>
-        bool GreaterOrEqual(ValueType a, ValueType b)
-        {
-            return a >= b;
-        }
-    } // namespace BinaryPredicates
-
-    template <typename ValueType>
-    BinaryPredicateNode<ValueType>::BinaryPredicateNode() :
-        CompilableNode({ &_input1, &_input2 }, { &_output }),
-        _input1(this, {}, defaultInput1PortName),
-        _input2(this, {}, defaultInput2PortName),
-        _output(this, defaultOutputPortName, 0),
-        _predicate(emitters::BinaryPredicateType::none)
-    {
-    }
-
-    template <typename ValueType>
-    BinaryPredicateNode<ValueType>::BinaryPredicateNode(const model::OutputPort<ValueType>& input1, const model::OutputPort<ValueType>& input2, emitters::BinaryPredicateType predicate) :
-        CompilableNode({ &_input1, &_input2 }, { &_output }),
-        _input1(this, input1, defaultInput1PortName),
-        _input2(this, input2, defaultInput2PortName),
-        _output(this, defaultOutputPortName, _input1.Size()),
-        _predicate(predicate)
-    {
-        if (input1.Size() != input2.Size())
-        {
-            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "Input sizes must match");
-        }
-        assert(input1.Size() == input2.Size());
-    }
-
-    template <typename ValueType>
-    template <typename Operation>
-    std::vector<bool> BinaryPredicateNode<ValueType>::ComputeOutput(Operation&& fn) const
-    {
-        auto output = std::vector<bool>(_input1.Size());
-        for (size_t index = 0; index < _input1.Size(); index++)
-        {
-            output[index] = fn(_input1[index], _input2[index]);
-        }
-        return output;
-    }
-
-    template <typename ValueType>
-    void BinaryPredicateNode<ValueType>::Compute() const
-    {
-        std::vector<bool> output;
-        switch (_predicate)
-        {
-        case emitters::BinaryPredicateType::equal:
-            output = ComputeOutput(BinaryPredicates::Equal<ValueType>);
-            break;
-        case emitters::BinaryPredicateType::less:
-            output = ComputeOutput(BinaryPredicates::Less<ValueType>);
-            break;
-        case emitters::BinaryPredicateType::greater:
-            output = ComputeOutput(BinaryPredicates::Greater<ValueType>);
-            break;
-        case emitters::BinaryPredicateType::notEqual:
-            output = ComputeOutput(BinaryPredicates::NotEqual<ValueType>);
-            break;
-        case emitters::BinaryPredicateType::lessOrEqual:
-            output = ComputeOutput(BinaryPredicates::LessOrEqual<ValueType>);
-            break;
-        case emitters::BinaryPredicateType::greaterOrEqual:
-            output = ComputeOutput(BinaryPredicates::GreaterOrEqual<ValueType>);
-            break;
-        default:
-            throw utilities::LogicException(utilities::LogicExceptionErrors::notImplemented, "Unknown predicate type");
-        }
-        _output.SetOutput(output);
-    };
-
-    template <typename ValueType>
-    void BinaryPredicateNode<ValueType>::Copy(model::ModelTransformer& transformer) const
-    {
-        const auto& portElements1 = transformer.GetCorrespondingInputs(_input1);
-        const auto& portElements2 = transformer.GetCorrespondingInputs(_input2);
-        auto newNode = transformer.AddNode<BinaryPredicateNode<ValueType>>(portElements1, portElements2, _predicate);
-        transformer.MapNodeOutput(output, newNode->output);
-    }
-
-    template <typename ValueType>
-    void BinaryPredicateNode<ValueType>::Compile(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
-    {
-        if (!compiler.GetCompilerOptions().unrollLoops)
-        {
-            CompileLoop(compiler, function);
-        }
-        else
-        {
-            CompileExpanded(compiler, function);
-        }
-    }
-
-    template <typename ValueType>
-    void BinaryPredicateNode<ValueType>::CompileLoop(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
-    {
-        emitters::LLVMValue pInput1 = compiler.EnsurePortEmitted(input1);
-        emitters::LLVMValue pInput2 = compiler.EnsurePortEmitted(input2);
-        emitters::LLVMValue pResult = compiler.EnsurePortEmitted(output);
-        emitters::TypedComparison cmp = emitters::GetComparison<ValueType>(GetPredicate());
-
-        function.For(input1.Size(), [pInput1, pInput2, pResult, cmp](emitters::IRFunctionEmitter& function, emitters::LLVMValue i) {
-            emitters::LLVMValue inputValue1 = function.ValueAt(pInput1, i);
-            emitters::LLVMValue inputValue2 = function.ValueAt(pInput2, i);
-            emitters::LLVMValue pOpResult = function.Comparison(cmp, inputValue1, inputValue2);
-            // LLVM internally uses 1 bit for boolean. We use integers to store boolean results. That requires a typecast in LLVM
-            function.SetValueAt(pResult, i, function.CastBoolToByte(pOpResult));
-        });
-    }
-
-    template <typename ValueType>
-    void BinaryPredicateNode<ValueType>::CompileExpanded(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
-    {
-        emitters::LLVMValue pResult = compiler.EnsurePortEmitted(output);
-
-        auto count = input1.Size();
-        for (size_t i = 0; i < count; ++i)
-        {
-            emitters::LLVMValue inputValue1 = compiler.LoadPortElementVariable(input1.GetInputElement(i));
-            emitters::LLVMValue inputValue2 = compiler.LoadPortElementVariable(input2.GetInputElement(i));
-            emitters::LLVMValue pOpResult = function.Comparison(emitters::GetComparison<ValueType>(GetPredicate()), inputValue1, inputValue2);
-            function.SetValueAt(pResult, function.Literal((int)i), function.CastBoolToByte(pOpResult));
-        }
-    }
-
-    template <typename ValueType>
-    void BinaryPredicateNode<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
-    {
-        Node::WriteToArchive(archiver);
-        archiver[defaultInput1PortName] << _input1;
-        archiver[defaultInput2PortName] << _input2;
-        archiver["predicate"] << BinaryPredicates::to_string(_predicate);
-    }
-
-    template <typename ValueType>
-    void BinaryPredicateNode<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
-    {
-        Node::ReadFromArchive(archiver);
-        archiver[defaultInput1PortName] >> _input1;
-        archiver[defaultInput2PortName] >> _input2;
-        std::string predicate;
-        archiver["predicate"] >> predicate;
-        _predicate = BinaryPredicates::from_string(predicate);
-        _output.SetSize(_input1.Size());
-    }
-} // namespace nodes
-} // namespace ell
diff --git a/libraries/nodes/tcc/BroadcastFunctionNode.tcc b/libraries/nodes/tcc/BroadcastFunctionNode.tcc
deleted file mode 100644
index 0c6978ede..000000000
--- a/libraries/nodes/tcc/BroadcastFunctionNode.tcc
+++ /dev/null
@@ -1,772 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     BroadcastFunctionNode.tcc (nodes)
-//  Authors:  Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace nodes
-{
-    //
-    // BroadcastUnaryFunction
-    //
-    template <typename ValueType>
-    ValueType BroadcastUnaryFunction<ValueType>::Compute(ValueType x, const std::vector<ValueType>& secondaryArgs) const
-    {
-        assert(secondaryArgs.size() == 0);
-        return Compute(x);
-    }
-
-    template <typename ValueType>
-    emitters::LLVMValue BroadcastUnaryFunction<ValueType>::Compile(emitters::IRFunctionEmitter& function, emitters::LLVMValue x, const std::vector<emitters::LLVMValue>& secondaryArgs) const
-    {
-        assert(secondaryArgs.size() == 0);
-        return this->Compile(function, x);
-    }
-
-    //
-    // BroadcastBinaryFunction
-    //
-    template <typename ValueType>
-    ValueType BroadcastBinaryFunction<ValueType>::Compute(ValueType x, const std::vector<ValueType>& secondaryArgs) const
-    {
-        assert(secondaryArgs.size() == 1);
-        return Compute(x, secondaryArgs[0]);
-    }
-
-    template <typename ValueType>
-    emitters::LLVMValue BroadcastBinaryFunction<ValueType>::Compile(emitters::IRFunctionEmitter& function, emitters::LLVMValue x, const std::vector<emitters::LLVMValue>& secondaryArgs) const
-    {
-        assert(secondaryArgs.size() == 1);
-        return this->Compile(function, x, secondaryArgs[0]);
-    }
-
-    //
-    // BroadcastTernaryFunction
-    //
-    template <typename ValueType>
-    ValueType BroadcastTernaryFunction<ValueType>::Compute(ValueType x, const std::vector<ValueType>& secondaryArgs) const
-    {
-        assert(secondaryArgs.size() == 2);
-        return Compute(x, secondaryArgs[0], secondaryArgs[1]);
-    }
-
-    template <typename ValueType>
-    emitters::LLVMValue BroadcastTernaryFunction<ValueType>::Compile(emitters::IRFunctionEmitter& function, emitters::LLVMValue x, const std::vector<emitters::LLVMValue>& secondaryArgs) const
-    {
-        assert(secondaryArgs.size() == 2);
-        return this->Compile(function, x, secondaryArgs[0], secondaryArgs[1]);
-    }
-
-    //
-    // BroadcastLinearFunction
-    //
-    template <typename ValueType>
-    ValueType BroadcastLinearFunction<ValueType>::Compute(ValueType x, ValueType scale, ValueType bias) const
-    {
-        return scale * x + bias;
-    }
-
-    template <typename ValueType>
-    emitters::LLVMValue BroadcastLinearFunction<ValueType>::Compile(emitters::IRFunctionEmitter& function, emitters::LLVMValue x, emitters::LLVMValue scale, emitters::LLVMValue bias) const
-    {
-        if (scale == nullptr) // bias only
-        {
-            return function.Operator(emitters::GetAddForValueType<ValueType>(), x, bias);
-        }
-        else if (bias == nullptr) // scale only
-        {
-            return function.Operator(emitters::GetMultiplyForValueType<ValueType>(), scale, x);
-        }
-        else
-        {
-            return function.Operator(emitters::GetAddForValueType<ValueType>(), function.Operator(emitters::GetMultiplyForValueType<ValueType>(), scale, x), bias);
-        }
-    }
-
-    //
-    // BroadcastFunctionNode
-    //
-
-    template <typename ValueType, typename FunctionType>
-    BroadcastFunctionNode<ValueType, FunctionType>::BroadcastFunctionNode(const std::vector<model::InputPortBase*>& inputs, const std::vector<model::OutputPortBase*>& outputs) :
-        CompilableNode(inputs, outputs),
-        _paddingValue(0)
-    {
-    }
-
-    template <typename ValueType, typename FunctionType>
-    BroadcastFunctionNode<ValueType, FunctionType>::BroadcastFunctionNode(const std::vector<model::InputPortBase*>& inputs,
-                                                                          const model::PortMemoryLayout& inputLayout,
-                                                                          size_t broadcastDimension,
-                                                                          const std::vector<model::OutputPortBase*>& outputs,
-                                                                          const model::PortMemoryLayout& outputLayout,
-                                                                          FunctionType function,
-                                                                          ValueType paddingValue) :
-        CompilableNode(inputs, outputs),
-        _inputLayout(inputLayout),
-        _broadcastDimension(broadcastDimension),
-        _function(function),
-        _paddingValue(paddingValue)
-    {
-    }
-
-    template <typename ValueType, typename FunctionType>
-    model::PortMemoryLayout BroadcastFunctionNode<ValueType, FunctionType>::GetOutputMemoryLayout() const
-    {
-        return GetOutputPort(0)->GetMemoryLayout();
-    }
-
-    //
-    // Arbitrary-depth nested loops are generated recursively. The EmitComputeDimensionLoop
-    // function emits `numDimensions` nested loops of the form:
-    //
-    // for(iz = 0; iz < sz; ++iz)
-    // {
-    //     zOffset = (iz+offset[2]) * stride[2];
-    //     for(iy = 0; iy < sy; ++iy)
-    //     {
-    //         yOffset = zOffset + (iy+offset[1]) * stride[1];
-    //         for(ix = 0; ix < sx; ++ix)
-    //         {
-    //             offset = yOffset + (ix+offset[0]) * stride[0];
-    //             x = arr[offset];
-    //             val = f(x);
-    //             output[offset] = val;
-    //         }
-    //     }
-    // }
-    //
-
-    // Note: secondaryValues is passed by non-const reference to avoid copies. It doesn't function as an output parameter.
-    template <typename ValueType, typename FunctionType>
-    void BroadcastFunctionNode<ValueType, FunctionType>::ComputeDimensionLoop(size_t dimension, std::vector<ValueType>& output, size_t prevInputDimensionOffset, size_t prevOutputDimensionOffset, std::vector<ValueType>& secondaryValues) const
-    {
-        // Note: It should be easy to unroll the last K levels by putting a real loop here when dimension < k
-        //       Or, instead of unrolling, vectorizing:  if broadcastDimension = 1, let secondaryValue be a vector and load it one loop previous
-        //       If broadcastDimension = outermost dimension (0), we may want to parallelize over that dimension
-        const auto numDimensions = NumPrimaryInputDimensions();
-        auto&& inputLayout = GetInputMemoryLayout();
-        auto&& inputStride = inputLayout.GetExtent();
-        auto&& inputOffset = inputLayout.GetOffset();
-        auto&& inputSize = inputLayout.GetActiveSize();
-        auto&& outputLayout = GetOutputMemoryLayout();
-        auto&& outputStride = outputLayout.GetExtent();
-        auto&& outputOffset = outputLayout.GetOffset();
-        auto&& primaryInput = GetPrimaryInput();
-        const auto broadcastDimension = GetBroadcastDimension();
-        const auto numSecondaryInputs = NumSecondaryInputs();
-
-        for (int loopIndex = 0; loopIndex < inputSize[dimension]; ++loopIndex)
-        {
-            // offset within start of this dimension = (loopIndex + offset[dimension])
-            auto thisInputDimensionInternalOffset = loopIndex + inputOffset[dimension];
-            auto thisOutputDimensionInternalOffset = loopIndex + outputOffset[dimension];
-
-            size_t thisInputDimensionOffset = thisInputDimensionInternalOffset;
-            size_t thisOutputDimensionOffset = thisOutputDimensionInternalOffset;
-            if (dimension != 0)
-            {
-                thisInputDimensionOffset += prevInputDimensionOffset * inputStride[dimension];
-                thisOutputDimensionOffset += prevOutputDimensionOffset * outputStride[dimension];
-            }
-
-            if (dimension == broadcastDimension)
-            {
-                for (int index = 0; index < numSecondaryInputs; ++index)
-                {
-                    auto&& secondaryInput = GetSecondaryInput(index);
-                    if (IsSecondaryInputPresent(index))
-                    {
-                        secondaryValues[index] = (*secondaryInput)[loopIndex];
-                    }
-                    else
-                    {
-                        // Dubious hack to deal with linear function nodes missing a coefficient
-                        if (std::is_same<FunctionType, BroadcastLinearFunction<ValueType>>::value && index == 0) // "scale" value, which should be 1 if not specified
-                        {
-                            secondaryValues[index] = static_cast<ValueType>(1.0);
-                        }
-                        else
-                        {
-                            secondaryValues[index] = 0;
-                        }
-                    }
-                }
-            }
-
-            if (dimension < numDimensions - 1)
-            {
-                // Recursive call to emit nested loop
-                ComputeDimensionLoop(dimension + 1, output, thisInputDimensionOffset, thisOutputDimensionOffset, secondaryValues);
-            }
-            else
-            {
-                // We're in the innermost loop --- compute the value
-                auto primaryValue = primaryInput[thisInputDimensionOffset];
-                auto outputValue = GetFunction().Compute(primaryValue, secondaryValues);
-                output[thisOutputDimensionOffset] = outputValue;
-            }
-        }
-    }
-
-    // wrapper around EmitComputeDimensionLoop for use by parallel tasks
-    template <typename ValueType, typename FunctionType>
-    emitters::IRFunctionEmitter BroadcastFunctionNode<ValueType, FunctionType>::GetTaskFunction(model::IRMapCompiler& compiler,
-                                                                                                emitters::IRFunctionEmitter& function,
-                                                                                                const emitters::LLVMTypeList& portTypes) const
-    {
-        auto& module = function.GetModule();
-        auto& emitter = module.GetIREmitter();
-        auto& context = module.GetLLVMContext();
-        auto int32Type = emitter.Type(emitters::VariableType::Int32);
-        auto voidType = llvm::Type::getVoidTy(context);
-
-        // ASSUME dimension == 0 --- we're only parallelizing on the outermost loop
-        int dimension = 0;
-
-        emitters::LLVMTypeList argTypes = portTypes;
-        // int numValuePorts = 2 + NumSecondaryInputs(); // primary input, secondary inputs, output
-        // argTypes.insert(argTypes.end(), numValuePorts, valuePtrType);
-        argTypes.insert(argTypes.end(), 2, int32Type); // begin, end
-
-        auto taskFunction = function.GetModule().BeginFunction(utilities::to_string(GetId()) + "_task", voidType, argTypes);
-        {
-            // get stuff from arguments
-            auto arguments = taskFunction.Arguments().begin();
-            auto primaryInput = &(*arguments++);
-            std::vector<emitters::LLVMValue> secondaryInputs;
-            std::vector<emitters::LLVMValue> secondaryValues;
-            for (int index = 0; index < NumSecondaryInputs(); ++index)
-            {
-                auto secondaryInput = &(*arguments++);
-                // if we really have an input, push it, else push a nullptr (note: we know this at compile-time)
-                if (IsSecondaryInputPresent(index))
-                {
-                    secondaryInputs.push_back(secondaryInput);
-                }
-                else
-                {
-                    secondaryInputs.push_back(nullptr);
-                }
-                secondaryValues.push_back(nullptr);
-            }
-            auto output = &(*arguments++);
-            auto begin = function.LocalScalar(&(*arguments++));
-            auto end = function.LocalScalar(&(*arguments++));
-            auto prevInputDimensionOffset = function.LocalScalar();
-            auto prevOutputDimensionOffset = function.LocalScalar();
-
-            EmitComputeDimensionLoop(compiler, taskFunction, dimension, begin, end, primaryInput, secondaryInputs, output, prevInputDimensionOffset, prevOutputDimensionOffset, secondaryValues);
-            taskFunction.Return();
-        }
-        function.GetModule().EndFunction();
-
-        return taskFunction;
-    }
-
-    // Note: secondaryValues is passed by non-const reference to avoid copies. It doesn't function as an output parameter.
-    template <typename ValueType, typename FunctionType>
-    void BroadcastFunctionNode<ValueType, FunctionType>::EmitComputeDimensionLoop(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function, size_t dimension, emitters::IRLocalScalar begin, emitters::IRLocalScalar end, emitters::LLVMValue primaryInput, const std::vector<emitters::LLVMValue>& secondaryInputs, emitters::LLVMValue output, emitters::IRLocalScalar prevInputDimensionOffset, emitters::IRLocalScalar prevOutputDimensionOffset, std::vector<emitters::LLVMValue>& secondaryValues) const
-    {
-        // Note: It should be easy to unroll the last K levels by putting a real loop here when dimension < k
-        //       Or, instead of unrolling, vectorizing --- if broadcastDimension = 1, let secondaryValue be a vector and load it one loop previous
-        //       If broadcastDimension = outermost dimension (0), we may want to parallelize over that dimension
-        const auto numDimensions = NumPrimaryInputDimensions();
-        auto&& inputLayout = GetInputMemoryLayout();
-        auto&& inputStride = inputLayout.GetExtent();
-        auto&& inputOffset = inputLayout.GetOffset();
-        auto&& inputSize = inputLayout.GetActiveSize();
-        auto&& outputLayout = GetOutputMemoryLayout();
-        auto&& outputStride = outputLayout.GetExtent();
-        auto&& outputOffset = outputLayout.GetOffset();
-        const auto broadcastDimension = GetBroadcastDimension();
-        const auto numSecondaryInputs = NumSecondaryInputs();
-
-        function.For(begin, end, [dimension, numDimensions, inputSize, inputOffset, inputStride, outputOffset, outputStride, broadcastDimension, numSecondaryInputs, prevInputDimensionOffset, prevOutputDimensionOffset, primaryInput, secondaryInputs, output, &secondaryValues, &compiler, this](emitters::IRFunctionEmitter& function, auto loopIndex) {
-            // Calculate the offset within this dimension = (loopIndex + offset[dimension])
-            auto thisInputDimensionInternalOffset = loopIndex + inputOffset[dimension];
-            auto thisOutputDimensionInternalOffset = loopIndex + outputOffset[dimension];
-
-            // Calculate the total offset from beginning of memory:
-            //   * if in the outermost loop, the offset into this dimension
-            //   * otherwise, the offset into this dimension plus the previous offset scaled by the previous dimension's stride
-            auto thisInputDimensionOffset = function.LocalScalar();
-            auto thisOutputDimensionOffset = function.LocalScalar();
-            if (dimension == 0)
-            {
-                assert(!prevInputDimensionOffset.IsValid());
-                assert(!prevOutputDimensionOffset.IsValid());
-                thisInputDimensionOffset = thisInputDimensionInternalOffset;
-                thisOutputDimensionOffset = thisOutputDimensionInternalOffset;
-            }
-            else
-            {
-                thisInputDimensionOffset = thisInputDimensionInternalOffset + (prevInputDimensionOffset * inputStride[dimension]);
-                thisOutputDimensionOffset = thisOutputDimensionInternalOffset + (prevOutputDimensionOffset * outputStride[dimension]);
-            }
-
-            if (dimension == broadcastDimension)
-            {
-                for (int index = 0; index < numSecondaryInputs; ++index)
-                {
-                    auto&& secondaryInput = secondaryInputs[index];
-                    secondaryValues[index] = this->IsSecondaryInputPresent(index) ? function.ValueAt(secondaryInput, loopIndex) : nullptr;
-                }
-            }
-
-            if (dimension < numDimensions - 1)
-            {
-                // Recursive call to emit nested loop
-                auto nextBegin = function.LocalScalar<int>(0);
-                auto nextEnd = function.LocalScalar<int>(inputSize[dimension + 1]);
-                this->EmitComputeDimensionLoop(compiler, function, dimension + 1, nextBegin, nextEnd, primaryInput, secondaryInputs, output, thisInputDimensionOffset, thisOutputDimensionOffset, secondaryValues);
-            }
-            else
-            {
-                // We're in the innermost loop --- compute the value
-                auto primaryValue = function.ValueAt(primaryInput, thisInputDimensionOffset);
-                auto outputValue = this->GetFunction().Compile(function, primaryValue, secondaryValues);
-                function.SetValueAt(output, thisOutputDimensionOffset, outputValue);
-            }
-        });
-    }
-
-    template <typename ValueType, typename FunctionType>
-    bool BroadcastFunctionNode<ValueType, FunctionType>::IsSecondaryInputPresent(int index) const
-    {
-        auto secondaryInput = GetSecondaryInput(index);
-        if (secondaryInput)
-        {
-            return secondaryInput->Size() > 0;
-        }
-        else
-        {
-            return false;
-        }
-    }
-
-    template <typename ValueType, typename FunctionType>
-    void BroadcastFunctionNode<ValueType, FunctionType>::Compute() const
-    {
-        auto outputSize = GetOutputMemoryLayout().GetExtent().NumElements();
-        auto output = std::vector<ValueType>(outputSize);
-
-        const size_t prevInputOffset = 0;
-        const size_t prevOutputOffset = 0;
-        std::vector<ValueType> secondaryValues(NumSecondaryInputs(), static_cast<ValueType>(0));
-        ComputeDimensionLoop(0, output, prevInputOffset, prevOutputOffset, secondaryValues);
-
-        GetOutput().SetOutput(output);
-    }
-
-    template <typename ValueType, typename FunctionType>
-    void BroadcastFunctionNode<ValueType, FunctionType>::Compile(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
-    {
-        const auto& compilerSettings = compiler.GetCompilerOptions();
-
-        auto& module = function.GetModule();
-        auto& emitter = module.GetIREmitter();
-        auto valueType = emitter.Type(emitters::GetVariableType<ValueType>());
-        auto valuePtrType = valueType->getPointerTo();
-
-        const auto& primaryInput = GetPrimaryInput();
-        auto primaryInputSize = primaryInput.Size();
-        auto&& inputLayout = GetInputMemoryLayout();
-        auto&& inputSize = inputLayout.GetActiveSize();
-        auto secondaryInputSize = GetSecondaryInputSize();
-        DEBUG_USED(secondaryInputSize);
-        assert(secondaryInputSize == 0 || primaryInputSize % secondaryInputSize == 0);
-
-        emitters::LLVMValue pPrimaryInput = compiler.EnsurePortEmitted(primaryInput);
-        std::vector<emitters::LLVMValue> secondaryInputs;
-        std::vector<emitters::LLVMValue> secondaryValues;
-        for (int index = 0; index < NumSecondaryInputs(); ++index)
-        {
-            auto secondaryInputPort = GetSecondaryInput(index);
-            auto secondaryInputSize = secondaryInputPort->Size();
-            emitters::LLVMValue secondaryInput = (secondaryInputSize > 0) ? compiler.EnsurePortEmitted(*secondaryInputPort) : function.NullPointer(valuePtrType);
-            secondaryInputs.push_back(secondaryInput);
-            secondaryValues.push_back(nullptr);
-        }
-        emitters::LLVMValue pOutput = compiler.EnsurePortEmitted(GetOutput(), this->GetOutputPadding());
-
-        // Call recursive function to emit nested loops
-        // Note: We could just offset the input pointer at beginning instead of adding offset every time through the loop
-        // Note: We can potentially fuse adjacent loops if memory is contiguous --- it can be done by preprocessing size/stride vectors
-        bool allSecondaryInputsValid = true;
-        for (int index = 0; index < NumSecondaryInputs(); ++index)
-        {
-            if (!IsSecondaryInputPresent(index))
-            {
-                allSecondaryInputsValid = false;
-            }
-        }
-
-        const int minimumTaskSize = 4000;
-        if (compilerSettings.parallelize && allSecondaryInputsValid && primaryInputSize > 2 * minimumTaskSize)
-        {
-            // computes ceil(a/b)
-            auto CeilDiv = [](int a, int b) {
-                return (a - 1) / b + 1;
-            };
-
-            // TODO: fix up logic for deciding how many tasks to use.
-            //   want to specify minimum amount of work per task, and create fewer tasks
-            //   if we don't have enough work.
-            auto numOuterIterations = inputSize[0];
-            const int numDesiredTasks = compilerSettings.maxThreads;
-            int taskSize = std::max(CeilDiv(primaryInputSize, numDesiredTasks), minimumTaskSize);
-            const int numTasks = std::min(CeilDiv(primaryInputSize, taskSize), compilerSettings.maxThreads);
-            taskSize = CeilDiv(numOuterIterations, numTasks);
-
-            // Ugly type-getting code to get around the type of the emitted port variables being different depending
-            // on whether the node is inlined (or something).
-            emitters::LLVMTypeList taskFunctionArgTypes{ pPrimaryInput->getType() };
-            for (auto& secondaryInput : secondaryInputs)
-            {
-                taskFunctionArgTypes.push_back(secondaryInput->getType());
-            }
-            taskFunctionArgTypes.push_back(pOutput->getType());
-
-            auto taskFunction = this->GetTaskFunction(compiler, function, taskFunctionArgTypes);
-            std::vector<std::vector<emitters::LLVMValue>> taskArgs;
-            for (int taskIndex = 0; taskIndex < numTasks; ++taskIndex)
-            {
-                auto begin = function.Literal<int>(taskIndex * taskSize);
-                auto end = function.Literal<int>(std::min((taskIndex + 1) * taskSize, numOuterIterations));
-
-                std::vector<emitters::LLVMValue> args{ pPrimaryInput };
-                args.insert(args.end(), secondaryInputs.begin(), secondaryInputs.end());
-                args.insert(args.end(), { pOutput, begin, end });
-            }
-            auto tasks = function.StartTasks(taskFunction, taskArgs);
-            tasks.WaitAll(function);
-        }
-        else
-        {
-            auto prevInputDimensionOffset = function.LocalScalar();
-            auto prevOutputDimensionOffset = function.LocalScalar();
-            auto begin = function.LocalScalar<int>(0);
-            auto end = function.LocalScalar<int>(inputSize[0]);
-            EmitComputeDimensionLoop(compiler, function, 0, begin, end, pPrimaryInput, secondaryInputs, pOutput, prevInputDimensionOffset, prevOutputDimensionOffset, secondaryValues);
-        }
-    }
-
-    template <typename ValueType, typename FunctionType>
-    void BroadcastFunctionNode<ValueType, FunctionType>::WriteToArchive(utilities::Archiver& archiver) const
-    {
-        model::CompilableNode::WriteToArchive(archiver);
-
-        archiver["inputLayout"] << _inputLayout;
-        archiver["outputLayout"] << GetOutputMemoryLayout();
-        archiver["broadcastDimension"] << _broadcastDimension;
-        archiver["paddingValue"] << _paddingValue;
-    }
-
-    template <typename ValueType, typename FunctionType>
-    void BroadcastFunctionNode<ValueType, FunctionType>::ReadFromArchive(utilities::Unarchiver& archiver)
-    {
-        model::CompilableNode::ReadFromArchive(archiver);
-
-        archiver["inputLayout"] >> _inputLayout;
-        model::PortMemoryLayout outputLayout;
-        archiver["outputLayout"] >> outputLayout;
-        auto outputs = GetOutputPorts();
-        for (auto p : outputs)
-        {
-            p->SetMemoryLayout(outputLayout);
-        }
-        archiver["broadcastDimension"] >> _broadcastDimension;
-        archiver["paddingValue"] >> _paddingValue;
-    }
-
-    //
-    // BroadcastUnaryFunctionNode
-    //
-    template <typename ValueType, typename FunctionType>
-    BroadcastUnaryFunctionNode<ValueType, FunctionType>::BroadcastUnaryFunctionNode() :
-        BroadcastFunctionNode<ValueType, FunctionType>({ &_primaryInput }, { &_output }),
-        _primaryInput(this, {}, primaryInputPortName),
-        _output(this, ell::model::Node::defaultOutputPortName, 0)
-    {
-    }
-
-    template <typename ValueType, typename FunctionType>
-    BroadcastUnaryFunctionNode<ValueType, FunctionType>::BroadcastUnaryFunctionNode(const model::OutputPort<ValueType>& primaryInput, const model::PortMemoryLayout& inputLayout, const model::PortMemoryLayout& outputLayout, ValueType paddingValue) :
-        BroadcastUnaryFunctionNode<ValueType, FunctionType>(primaryInput, inputLayout, outputLayout, FunctionType{}, paddingValue)
-    {
-    }
-
-    template <typename ValueType, typename FunctionType>
-    BroadcastUnaryFunctionNode<ValueType, FunctionType>::BroadcastUnaryFunctionNode(const model::OutputPort<ValueType>& primaryInput, const model::PortMemoryLayout& inputLayout, const model::PortMemoryLayout& outputLayout, FunctionType function, ValueType paddingValue) :
-        BroadcastFunctionNode<ValueType, FunctionType>({ &_primaryInput }, inputLayout, 0, { &_output }, outputLayout, function, paddingValue),
-        _primaryInput(this, primaryInput, primaryInputPortName),
-        _output(this, ell::model::Node::defaultOutputPortName, outputLayout)
-    {
-        // Verify sizes are compatible
-        size_t totalInputSize = inputLayout.GetMemorySize();
-        if (primaryInput.Size() < totalInputSize)
-        {
-            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "Primary input too small");
-        }
-    }
-
-    template <typename ValueType, typename FunctionType>
-    void BroadcastUnaryFunctionNode<ValueType, FunctionType>::Copy(model::ModelTransformer& transformer) const
-    {
-        const auto& primaryInputElements = transformer.GetCorrespondingInputs(_primaryInput);
-        auto broadcastFunction = GetFunction();
-        auto newNode = transformer.AddNode<BroadcastUnaryFunctionNode<ValueType, FunctionType>>(primaryInputElements,
-                                                                                                this->GetInputMemoryLayout(),
-                                                                                                this->GetOutputMemoryLayout(),
-                                                                                                broadcastFunction);
-        transformer.MapNodeOutput(output, newNode->output);
-    }
-
-    template <typename ValueType, typename FunctionType>
-    utilities::ArchiveVersion BroadcastUnaryFunctionNode<ValueType, FunctionType>::GetArchiveVersion() const
-    {
-        constexpr utilities::ArchiveVersion archiveVersion = { utilities::ArchiveVersionNumbers::v5_refined_nodes };
-
-        return archiveVersion;
-    }
-
-    template <typename ValueType, typename FunctionType>
-    bool BroadcastUnaryFunctionNode<ValueType, FunctionType>::CanReadArchiveVersion(const utilities::ArchiveVersion& version) const
-    {
-        constexpr utilities::ArchiveVersion archiveVersion = { utilities::ArchiveVersionNumbers::v5_refined_nodes };
-
-        return version >= archiveVersion;
-    }
-
-    template <typename ValueType, typename FunctionType>
-    void BroadcastUnaryFunctionNode<ValueType, FunctionType>::WriteToArchive(utilities::Archiver& archiver) const
-    {
-        BroadcastFunctionNode<ValueType, FunctionType>::WriteToArchive(archiver);
-        archiver[primaryInputPortName] << _primaryInput;
-    }
-
-    template <typename ValueType, typename FunctionType>
-    void BroadcastUnaryFunctionNode<ValueType, FunctionType>::ReadFromArchive(utilities::Unarchiver& archiver)
-    {
-        BroadcastFunctionNode<ValueType, FunctionType>::ReadFromArchive(archiver);
-        archiver[primaryInputPortName] >> _primaryInput;
-    }
-
-    template <typename ValueType, typename FunctionType>
-    const model::InputPort<ValueType>* BroadcastUnaryFunctionNode<ValueType, FunctionType>::GetSecondaryInput(int index) const
-    {
-        assert(index == 0);
-        return nullptr;
-    }
-
-    //
-    // BroadcastBinaryFunctionNode
-    //
-    template <typename ValueType, typename FunctionType>
-    BroadcastBinaryFunctionNode<ValueType, FunctionType>::BroadcastBinaryFunctionNode() :
-        BroadcastFunctionNode<ValueType, FunctionType>({ &_primaryInput, &_secondaryInput }, { &_output }),
-        _primaryInput(this, {}, primaryInputPortName),
-        _secondaryInput(this, {}, secondaryInputPortName),
-        _output(this, ell::model::Node::defaultOutputPortName, 0)
-    {
-    }
-
-    template <typename ValueType, typename FunctionType>
-    BroadcastBinaryFunctionNode<ValueType, FunctionType>::BroadcastBinaryFunctionNode(const model::OutputPort<ValueType>& primaryInput, const model::PortMemoryLayout& inputLayout, const model::OutputPort<ValueType>& secondaryInput, size_t dimension, const model::PortMemoryLayout& outputLayout, ValueType paddingValue) :
-        BroadcastBinaryFunctionNode<ValueType, FunctionType>(primaryInput, inputLayout, secondaryInput, dimension, outputLayout, FunctionType{}, paddingValue)
-    {
-    }
-
-    template <typename ValueType, typename FunctionType>
-    BroadcastBinaryFunctionNode<ValueType, FunctionType>::BroadcastBinaryFunctionNode(const model::OutputPort<ValueType>& primaryInput, const model::PortMemoryLayout& inputLayout, const model::OutputPort<ValueType>& secondaryInput, size_t dimension, const model::PortMemoryLayout& outputLayout, FunctionType function, ValueType paddingValue) :
-        BroadcastFunctionNode<ValueType, FunctionType>({ &_primaryInput, &_secondaryInput }, inputLayout, dimension, { &_output }, outputLayout, function, paddingValue),
-        _primaryInput(this, primaryInput, primaryInputPortName),
-        _secondaryInput(this, secondaryInput, secondaryInputPortName),
-        _output(this, ell::model::Node::defaultOutputPortName, outputLayout)
-    {
-        // Verify sizes are compatible
-        size_t totalInputSize = inputLayout.GetMemorySize();
-        if (primaryInput.Size() < totalInputSize)
-        {
-            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "Primary input too small");
-        }
-
-        if (secondaryInput.Size() != inputLayout.GetActiveSize(dimension))
-        {
-            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "Broadcast vector size doesn't match input");
-        }
-    }
-
-    template <typename ValueType, typename FunctionType>
-    void BroadcastBinaryFunctionNode<ValueType, FunctionType>::Copy(model::ModelTransformer& transformer) const
-    {
-        const auto& primaryInputElements = transformer.GetCorrespondingInputs(_primaryInput);
-        const auto& secondaryInputElements = transformer.GetCorrespondingInputs(_secondaryInput);
-        auto newNode = transformer.AddNode<BroadcastBinaryFunctionNode<ValueType, FunctionType>>(primaryInputElements,
-                                                                                                 this->GetInputMemoryLayout(),
-                                                                                                 secondaryInputElements,
-                                                                                                 this->GetBroadcastDimension(),
-                                                                                                 this->GetOutputMemoryLayout(),
-                                                                                                 GetFunction());
-        transformer.MapNodeOutput(output, newNode->output);
-    }
-
-    template <typename ValueType, typename FunctionType>
-    void BroadcastBinaryFunctionNode<ValueType, FunctionType>::WriteToArchive(utilities::Archiver& archiver) const
-    {
-        BroadcastFunctionNode<ValueType, FunctionType>::WriteToArchive(archiver);
-        archiver[primaryInputPortName] << _primaryInput;
-        archiver[secondaryInputPortName] << _secondaryInput;
-    }
-
-    template <typename ValueType, typename FunctionType>
-    void BroadcastBinaryFunctionNode<ValueType, FunctionType>::ReadFromArchive(utilities::Unarchiver& archiver)
-    {
-        BroadcastFunctionNode<ValueType, FunctionType>::ReadFromArchive(archiver);
-        archiver[primaryInputPortName] >> _primaryInput;
-        archiver[secondaryInputPortName] >> _secondaryInput;
-    }
-
-    template <typename ValueType, typename FunctionType>
-    const model::InputPort<ValueType>* BroadcastBinaryFunctionNode<ValueType, FunctionType>::GetSecondaryInput(int index) const
-    {
-        assert(index == 0);
-        return &_secondaryInput;
-    }
-
-    //
-    // BroadcastTernaryFunctionNode
-    //
-    template <typename ValueType, typename FunctionType>
-    BroadcastTernaryFunctionNode<ValueType, FunctionType>::BroadcastTernaryFunctionNode() :
-        BroadcastFunctionNode<ValueType, FunctionType>({ &_primaryInput, &_secondaryInput1, &_secondaryInput2 }, { &_output }),
-        _primaryInput(this, {}, primaryInputPortName),
-        _secondaryInput1(this, {}, secondaryInput1PortName),
-        _secondaryInput2(this, {}, secondaryInput2PortName),
-        _output(this, ell::model::Node::defaultOutputPortName, 0)
-    {
-    }
-
-    template <typename ValueType, typename FunctionType>
-    BroadcastTernaryFunctionNode<ValueType, FunctionType>::BroadcastTernaryFunctionNode(const model::OutputPort<ValueType>& primaryInput, const model::PortMemoryLayout& inputLayout, const model::OutputPort<ValueType>& secondaryInput1, const model::OutputPort<ValueType>& secondaryInput2, size_t dimension, const model::PortMemoryLayout& outputLayout, ValueType paddingValue) :
-        BroadcastTernaryFunctionNode<ValueType, FunctionType>(primaryInput, inputLayout, secondaryInput1, secondaryInput2, dimension, outputLayout, FunctionType{}, paddingValue)
-    {
-    }
-
-    template <typename ValueType, typename FunctionType>
-    BroadcastTernaryFunctionNode<ValueType, FunctionType>::BroadcastTernaryFunctionNode(const model::OutputPort<ValueType>& primaryInput, const model::PortMemoryLayout& inputLayout, const model::OutputPort<ValueType>& secondaryInput1, const model::OutputPort<ValueType>& secondaryInput2, size_t dimension, const model::PortMemoryLayout& outputLayout, FunctionType function, ValueType paddingValue) :
-        BroadcastFunctionNode<ValueType, FunctionType>({ &_primaryInput, &_secondaryInput1, &_secondaryInput2 }, inputLayout, dimension, { &_output }, outputLayout, function, paddingValue),
-        _primaryInput(this, primaryInput, primaryInputPortName),
-        _secondaryInput1(this, secondaryInput1, secondaryInput1PortName),
-        _secondaryInput2(this, secondaryInput2, secondaryInput2PortName),
-        _output(this, ell::model::Node::defaultOutputPortName, outputLayout)
-    {
-        // Verify sizes are compatible
-        size_t totalInputSize = inputLayout.GetMemorySize();
-        if (primaryInput.Size() < totalInputSize)
-        {
-            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "Primary input too small");
-        }
-
-        if (std::max(secondaryInput1.Size(), secondaryInput2.Size()) != static_cast<size_t>(inputLayout.GetActiveSize(dimension)))
-        {
-            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, std::string("Broadcast vector size doesn't match input") + "_" + std::to_string(secondaryInput1.Size()) + "_" + std::to_string(secondaryInput2.Size()) + "_" + std::to_string(inputLayout.GetActiveSize(dimension)));
-        }
-
-        if (secondaryInput1.Size() != secondaryInput2.Size() && secondaryInput1.Size() > 0 && secondaryInput2.Size() > 0)
-        {
-            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "If present, secondary inputs must have the same size");
-        }
-
-        if (inputLayout.GetActiveSize() != outputLayout.GetActiveSize())
-        {
-            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "BroadcastFunctionNode: Input and output active area sizes don't match");
-        }
-    }
-
-    template <typename ValueType, typename FunctionType>
-    void BroadcastTernaryFunctionNode<ValueType, FunctionType>::Copy(model::ModelTransformer& transformer) const
-    {
-        const auto& primaryInputElements = transformer.GetCorrespondingInputs(_primaryInput);
-        const auto& secondaryInput1Elements = transformer.GetCorrespondingInputs(_secondaryInput1);
-        const auto& secondaryInput2Elements = transformer.GetCorrespondingInputs(_secondaryInput2);
-        auto newNode = transformer.AddNode<BroadcastTernaryFunctionNode<ValueType, FunctionType>>(primaryInputElements,
-                                                                                                  this->GetInputMemoryLayout(),
-                                                                                                  secondaryInput1Elements,
-                                                                                                  secondaryInput2Elements,
-                                                                                                  this->GetBroadcastDimension(),
-                                                                                                  this->GetOutputMemoryLayout(),
-                                                                                                  GetFunction());
-        transformer.MapNodeOutput(output, newNode->output);
-    }
-
-    template <typename ValueType, typename FunctionType>
-    void BroadcastTernaryFunctionNode<ValueType, FunctionType>::WriteToArchive(utilities::Archiver& archiver) const
-    {
-        BroadcastFunctionNode<ValueType, FunctionType>::WriteToArchive(archiver);
-        archiver[primaryInputPortName] << _primaryInput;
-        archiver[secondaryInput1PortName] << _secondaryInput1;
-        archiver[secondaryInput2PortName] << _secondaryInput2;
-    }
-
-    template <typename ValueType, typename FunctionType>
-    void BroadcastTernaryFunctionNode<ValueType, FunctionType>::ReadFromArchive(utilities::Unarchiver& archiver)
-    {
-        BroadcastFunctionNode<ValueType, FunctionType>::ReadFromArchive(archiver);
-        archiver[primaryInputPortName] >> _primaryInput;
-        archiver[secondaryInput1PortName] >> _secondaryInput1;
-        archiver[secondaryInput2PortName] >> _secondaryInput2;
-    }
-
-    template <typename ValueType, typename FunctionType>
-    const model::InputPort<ValueType>* BroadcastTernaryFunctionNode<ValueType, FunctionType>::GetSecondaryInput(int index) const
-    {
-        assert(index < 2);
-        if (index == 0)
-        {
-            return &secondaryInput1;
-        }
-        else if (index == 1)
-        {
-            return &secondaryInput2;
-        }
-        return nullptr;
-    }
-
-    //
-    // BroadcastLinearFunctionNode
-    //
-    template <typename ValueType>
-    BroadcastLinearFunctionNode<ValueType>::BroadcastLinearFunctionNode() :
-        BroadcastTernaryFunctionNode<ValueType, BroadcastLinearFunction<ValueType>>()
-    {
-    }
-
-    template <typename ValueType>
-    BroadcastLinearFunctionNode<ValueType>::BroadcastLinearFunctionNode(const model::OutputPort<ValueType>& primaryInput, const model::PortMemoryLayout& inputLayout, const model::OutputPort<ValueType>& scaleInput, const model::OutputPort<ValueType>& biasInput, size_t dimension, const model::PortMemoryLayout& outputLayout, ValueType paddingValue) :
-        BroadcastTernaryFunctionNode<ValueType, BroadcastLinearFunction<ValueType>>(primaryInput, inputLayout, scaleInput, biasInput, dimension, outputLayout, paddingValue)
-    {
-    }
-
-    template <typename ValueType>
-    void BroadcastLinearFunctionNode<ValueType>::Copy(model::ModelTransformer& transformer) const
-    {
-        const auto& primaryInputElements = transformer.GetCorrespondingInputs(primaryInput);
-        const auto& scaleInputElements = transformer.GetCorrespondingInputs(secondaryInput1);
-        const auto& biasInputElements = transformer.GetCorrespondingInputs(secondaryInput2);
-        auto newNode = transformer.AddNode<BroadcastLinearFunctionNode<ValueType>>(primaryInputElements,
-                                                                                   this->GetInputMemoryLayout(),
-                                                                                   scaleInputElements,
-                                                                                   biasInputElements,
-                                                                                   this->GetBroadcastDimension(),
-                                                                                   this->GetOutputMemoryLayout());
-        transformer.MapNodeOutput(output, newNode->output);
-    }
-
-} // namespace nodes
-} // namespace ell
diff --git a/libraries/nodes/tcc/BufferNode.tcc b/libraries/nodes/tcc/BufferNode.tcc
deleted file mode 100644
index 84dbadbc8..000000000
--- a/libraries/nodes/tcc/BufferNode.tcc
+++ /dev/null
@@ -1,104 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     BufferNode.tcc (nodes)
-//  Authors:  Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace nodes
-{
-    template <typename ValueType>
-    BufferNode<ValueType>::BufferNode(const model::OutputPort<ValueType>& input, size_t windowSize) :
-        CompilableNode({ &_input }, { &_output }),
-        _input(this, input, defaultInputPortName),
-        _output(this, defaultOutputPortName, windowSize),
-        _windowSize(windowSize)
-    {
-        _samples.resize(windowSize);
-    }
-
-    template <typename ValueType>
-    BufferNode<ValueType>::BufferNode() :
-        CompilableNode({ &_input }, { &_output }),
-        _input(this, {}, defaultInputPortName),
-        _output(this, defaultOutputPortName, 0),
-        _windowSize(0)
-    {
-    }
-
-    template <typename ValueType>
-    void BufferNode<ValueType>::Compute() const
-    {
-        auto inputSize = input.Size();
-        if (inputSize > _samples.size())
-        {
-            inputSize = _samples.size();
-        }
-        auto offset = _samples.size() - inputSize;
-        if (offset > 0)
-        {
-            // Copy samples forward to make room for new samples
-            std::copy_n(_samples.begin() + offset, inputSize, _samples.begin());
-        }
-        // Copy input samples to tail
-        for (size_t index = 0; index < inputSize; ++index)
-        {
-            _samples[index + offset] = _input[index];
-        }
-        _output.SetOutput(_samples);
-    };
-
-    template <typename ValueType>
-    void BufferNode<ValueType>::Copy(model::ModelTransformer& transformer) const
-    {
-        const auto& newPortElements = transformer.GetCorrespondingInputs(_input);
-        auto newNode = transformer.AddNode<BufferNode<ValueType>>(newPortElements, _windowSize);
-        transformer.MapNodeOutput(output, newNode->output);
-    }
-
-    template <typename ValueType>
-    void BufferNode<ValueType>::Compile(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
-    {
-        int inputSize = input.Size();
-        size_t windowSize = this->GetWindowSize();
-        auto offset = windowSize - inputSize;
-
-        emitters::LLVMValue pInput = compiler.EnsurePortEmitted(input);
-        auto bufferVar = function.GetModule().Variables().AddVectorVariable<ValueType>(emitters::VariableScope::global, windowSize);
-        function.GetModule().AllocateVariable(*bufferVar);
-        emitters::LLVMValue buffer = function.GetModule().EnsureEmitted(*bufferVar);
-
-        // Copy samples forward to make room for new samples
-        function.MemoryMove<ValueType>(buffer, offset, 0, inputSize);
-
-        // Copy input samples to tail
-        function.MemoryCopy<ValueType>(pInput, 0, buffer, offset, inputSize);
-
-        // Copy to output
-        emitters::LLVMValue pOutput = compiler.EnsurePortEmitted(output);
-        function.MemoryCopy<ValueType>(buffer, 0, pOutput, 0, windowSize);
-    }
-
-    template <typename ValueType>
-    void BufferNode<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
-    {
-        Node::WriteToArchive(archiver);
-        archiver[defaultInputPortName] << _input;
-        archiver["windowSize"] << _windowSize;
-    }
-
-    template <typename ValueType>
-    void BufferNode<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
-    {
-        Node::ReadFromArchive(archiver);
-        archiver[defaultInputPortName] >> _input;
-        archiver["windowSize"] >> _windowSize;
-
-        _samples.resize(_windowSize);
-        _output.SetSize(_windowSize);
-    }
-} // namespace nodes
-} // namespace ell
diff --git a/libraries/nodes/tcc/ConcatenationNode.tcc b/libraries/nodes/tcc/ConcatenationNode.tcc
deleted file mode 100644
index 5620b7ea1..000000000
--- a/libraries/nodes/tcc/ConcatenationNode.tcc
+++ /dev/null
@@ -1,93 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     ConcatenationNode.tcc (nodes)
-//  Authors:  Byron Changuion
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace nodes
-{
-    template <typename ValueType>
-    ConcatenationNode<ValueType>::ConcatenationNode() :
-        CompilableNode({ &_input }, { &_output }),
-        _input(this, {}, defaultInputPortName),
-        _output(this, defaultOutputPortName, 0){};
-
-    template <typename ValueType>
-    ConcatenationNode<ValueType>::ConcatenationNode(const model::OutputPort<ValueType>& input) :
-        CompilableNode({ &_input }, { &_output }),
-        _input(this, input, defaultInputPortName),
-        _output(this, defaultOutputPortName, input.Size()){};
-
-    template <typename ValueType>
-    ConcatenationNode<ValueType>::ConcatenationNode(const model::OutputPort<ValueType>& input, const model::MemoryShape& shape) :
-        CompilableNode({ &_input }, { &_output }),
-        _input(this, input, defaultInputPortName),
-        _output(this, defaultOutputPortName, shape){};
-
-    template <typename ValueType>
-    void ConcatenationNode<ValueType>::Compute() const
-    {
-        _output.SetOutput(_input.GetValue());
-    }
-
-    template <typename ValueType>
-    void ConcatenationNode<ValueType>::Compile(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
-    {
-        assert(GetPortVariableType(_input) == GetPortVariableType(_output));
-
-        auto inputIsInputNode = (dynamic_cast<const model::InputNodeBase*>(_input.GetInputElement(0).ReferencedPort()->GetNode()) != nullptr);
-        // TODO: re-enable this branch when scalar port bug is fixed
-        if (_input.Size() != 1 && _output.Size() != 1 && !inputIsInputNode && false)
-        {
-            auto pVar = compiler.GetVariableForPort(_input.GetReferencedPort());
-            compiler.SetVariableForPort(_output, pVar);
-        }
-        else
-        {
-            auto input = function.LocalArray(compiler.EnsurePortEmitted(_input));
-            auto output = function.LocalArray(compiler.EnsurePortEmitted(_output));
-            // check if the output variable is null.
-            function.If(ell::emitters::TypedComparison::notEquals, output, function.NullPointer(output.value->getType()->getPointerElementType()->getPointerTo()), [input, output, this](emitters::IRFunctionEmitter& function) {
-                auto size = _input.Size();
-                function.For(size, [input, output](emitters::IRFunctionEmitter& function, auto i) {
-                    output[i] = input[i];
-                });
-            });
-        }
-    }
-
-    template <typename ValueType>
-    void ConcatenationNode<ValueType>::Copy(model::ModelTransformer& transformer) const
-    {
-        const auto& newPortElements = transformer.GetCorrespondingInputs(_input);
-        auto newNode = transformer.AddNode<ConcatenationNode<ValueType>>(newPortElements, GetShape());
-        transformer.MapNodeOutput(output, newNode->output);
-    }
-
-    template <typename ValueType>
-    void ConcatenationNode<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
-    {
-        Node::WriteToArchive(archiver);
-        archiver[defaultInputPortName] << _input;
-        archiver[shapeName] << GetShape().ToVector();
-    }
-
-    template <typename ValueType>
-    void ConcatenationNode<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
-    {
-        Node::ReadFromArchive(archiver);
-        archiver[defaultInputPortName] >> _input;
-        std::vector<int> shapeVector;
-        archiver[shapeName] >> shapeVector;
-        _output.SetSize(_input.Size());
-        if (shapeVector.size() >= 3)
-        {
-            SetShape({ shapeVector });
-        }
-    }
-} // namespace nodes
-} // namespace ell
diff --git a/libraries/nodes/tcc/ConstantNode.tcc b/libraries/nodes/tcc/ConstantNode.tcc
deleted file mode 100644
index e88169850..000000000
--- a/libraries/nodes/tcc/ConstantNode.tcc
+++ /dev/null
@@ -1,106 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     ConstantNode.tcc (nodes)
-//  Authors:  Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace nodes
-{
-    // superclass (Node) constructor takes two array arguments: inputs and outputs. These are pointers to our local InputPort and OutputPort storage.
-
-    // Default constructor
-    template <typename ValueType>
-    ConstantNode<ValueType>::ConstantNode() :
-        CompilableNode({}, { &_output }),
-        _output(this, defaultOutputPortName, 0){};
-
-    // Constructor for a scalar constant
-    template <typename ValueType>
-    ConstantNode<ValueType>::ConstantNode(ValueType value) :
-        CompilableNode({}, { &_output }),
-        _output(this, defaultOutputPortName, 1),
-        _values({ value }){};
-
-    // Constructor for a vector constant
-    template <typename ValueType>
-    ConstantNode<ValueType>::ConstantNode(const std::vector<ValueType>& values) :
-        CompilableNode({}, { &_output }),
-        _output(this, defaultOutputPortName, values.size()),
-        _values(values){};
-
-    template <typename ValueType>
-    ConstantNode<ValueType>::ConstantNode(const std::vector<ValueType>& values, const model::MemoryShape& shape) :
-        CompilableNode({}, { &_output }),
-        _output(this, defaultOutputPortName, shape),
-        _values(values){};
-
-    template <typename ValueType>
-    ConstantNode<ValueType>::ConstantNode(const std::vector<ValueType>& values, const model::PortMemoryLayout& layout) :
-        CompilableNode({}, { &_output }),
-        _output(this, defaultOutputPortName, layout),
-        _values(values){};
-
-    template <typename ValueType>
-    void ConstantNode<ValueType>::Compute() const
-    {
-        _output.SetOutput(_values);
-    }
-
-    template <typename ValueType>
-    void ConstantNode<ValueType>::Copy(model::ModelTransformer& transformer) const
-    {
-        auto newNode = transformer.AddNode<ConstantNode<ValueType>>(_values, _output.GetMemoryLayout().GetActiveSize());
-        transformer.MapNodeOutput(output, newNode->output);
-    }
-
-    template <typename ValueType>
-    void ConstantNode<ValueType>::Compile(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
-    {
-        auto values = this->GetValues();
-        emitters::Variable* pVar = nullptr;
-        pVar = function.GetModule().Variables().AddVariable<emitters::LiteralVectorVariable<ValueType>>(values);
-        compiler.SetVariableForPort(output, pVar); // Just set the variable corresponding to the output port to be the global variable we created
-    }
-
-    template <typename ValueType>
-    utilities::ArchiveVersion ConstantNode<ValueType>::GetArchiveVersion() const
-    {
-        return utilities::ArchiveVersionNumbers::v8_port_memory_layout;
-    }
-
-    template <typename ValueType>
-    bool ConstantNode<ValueType>::CanReadArchiveVersion(const utilities::ArchiveVersion& version) const
-    {
-        return version <= utilities::ArchiveVersionNumbers::v8_port_memory_layout;
-    }
-
-    template <typename ValueType>
-    void ConstantNode<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
-    {
-        Node::WriteToArchive(archiver);
-        archiver["values"] << _values;
-        archiver["layout"] << _output.GetMemoryLayout();
-    }
-
-    template <typename ValueType>
-    void ConstantNode<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
-    {
-        Node::ReadFromArchive(archiver);
-        archiver["values"] >> _values;
-        if (archiver.HasNextPropertyName("layout"))
-        {
-            model::PortMemoryLayout layout;
-            archiver["layout"] >> layout;
-            _output.SetMemoryLayout(layout);
-        }
-        else
-        {
-            _output.SetSize(_values.size());
-        }
-    }
-} // namespace nodes
-} // namespace ell
diff --git a/libraries/nodes/tcc/DTWDistanceNode.tcc b/libraries/nodes/tcc/DTWDistanceNode.tcc
deleted file mode 100644
index 95656242e..000000000
--- a/libraries/nodes/tcc/DTWDistanceNode.tcc
+++ /dev/null
@@ -1,265 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     DTWDistanceNode.tcc (nodes)
-//  Authors:  Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#include <emitters/include/IRLocalScalar.h>
-
-#include <limits>
-
-namespace ell
-{
-namespace nodes
-{
-    namespace DTWDistanceNodeImpl
-    {
-        template <typename ValueType>
-        double Variance(const std::vector<std::vector<ValueType>>& prototype)
-        {
-            double sum = 0; // sum(x)
-            double sumSquares = 0; // sum(x^2)
-            size_t size = 0;
-            for (const auto& vec : prototype)
-            {
-                size += vec.size();
-                for (auto x : vec)
-                {
-                    sum += x;
-                    sumSquares += (x * x);
-                }
-            }
-            return (sumSquares - ((sum * sum) / size)) / size;
-        }
-    } // namespace DTWDistanceNodeImpl
-
-    template <typename ValueType>
-    DTWDistanceNode<ValueType>::DTWDistanceNode() :
-        CompilableNode({ &_input }, { &_output }),
-        _input(this, {}, defaultInputPortName),
-        _output(this, defaultOutputPortName, 1),
-        _sampleDimension(0),
-        _prototypeLength(0),
-        _prototypeVariance(0)
-    {
-    }
-
-    template <typename ValueType>
-    DTWDistanceNode<ValueType>::DTWDistanceNode(const model::OutputPort<ValueType>& input, const std::vector<std::vector<ValueType>>& prototype) :
-        CompilableNode({ &_input }, { &_output }),
-        _input(this, input, defaultInputPortName),
-        _output(this, defaultOutputPortName, 1),
-        _prototype(prototype)
-    {
-        _sampleDimension = input.Size();
-        _prototypeLength = prototype.size();
-        _d.resize(_prototypeLength + 1);
-        _s.resize(_prototypeLength + 1);
-
-        _prototypeVariance = DTWDistanceNodeImpl::Variance(_prototype);
-        // _threshold = std::sqrt(-2 * std::log(confidenceThreshold)) * _prototypeVariance;
-        Reset();
-    }
-
-    template <typename ValueType>
-    void DTWDistanceNode<ValueType>::Reset()
-    {
-        std::fill(_d.begin() + 1, _d.end(), std::numeric_limits<ValueType>::max());
-        _d[0] = 0.0;
-        std::fill(_s.begin(), _s.end(), 0);
-        _currentTime = 0;
-    }
-
-    template <typename T>
-    float distance(const std::vector<T>& a, const std::vector<T>& b)
-    {
-        T s = 0;
-        for (size_t index = 0; index < a.size(); index++)
-        {
-            s += std::abs(a[index] - b[index]);
-        }
-        return static_cast<float>(s);
-    }
-
-    template <typename ValueType>
-    void DTWDistanceNode<ValueType>::Compute() const
-    {
-        std::vector<ValueType> input = _input.GetValue();
-        auto t = ++_currentTime;
-        auto dLast = _d[0] = 0;
-        auto sLast = _s[0] = t;
-
-        ValueType bestDist = 0;
-        int bestStart = 0;
-        for (size_t index = 1; index < _prototypeLength + 1; ++index)
-        {
-            auto d_iMinus1 = _d[index - 1];
-            auto dPrev_iMinus1 = dLast;
-            auto dPrev_i = _d[index];
-            auto s_iMinus1 = _s[index - 1];
-            auto sPrev_iMinus1 = sLast;
-            auto sPrev_i = _s[index];
-
-            bestDist = d_iMinus1;
-            bestStart = s_iMinus1;
-            if (dPrev_i < bestDist)
-            {
-                bestDist = dPrev_i;
-                bestStart = sPrev_i;
-            }
-            if (dPrev_iMinus1 < bestDist)
-            {
-                bestDist = dPrev_iMinus1;
-                bestStart = sPrev_iMinus1;
-            }
-            bestDist += distance(_prototype[index - 1], input);
-
-            _d[index] = bestDist;
-            _s[index] = bestStart;
-        }
-        assert(bestDist == _d[_prototypeLength]);
-        assert(bestStart == _s[_prototypeLength]);
-        auto result = bestDist / _prototypeVariance;
-
-        // Ensure best match is between 80% and 120% of prototype length
-        auto timeDiff = _currentTime - bestStart;
-        if (timeDiff < _prototypeLength * 0.8 || timeDiff > _prototypeLength * 1.2)
-        {
-            bestDist = std::numeric_limits<ValueType>::max();
-        }
-
-        _output.SetOutput({ static_cast<ValueType>(result) });
-    };
-
-    template <typename ValueType>
-    void DTWDistanceNode<ValueType>::Copy(model::ModelTransformer& transformer) const
-    {
-        const auto& newinput = transformer.GetCorrespondingInputs(_input);
-        auto newNode = transformer.AddNode<DTWDistanceNode<ValueType>>(newinput, _prototype);
-        transformer.MapNodeOutput(output, newNode->output);
-    }
-
-    template <typename ValueType>
-    std::vector<ValueType> DTWDistanceNode<ValueType>::GetPrototypeData() const
-    {
-        std::vector<ValueType> result;
-        result.reserve(_prototypeLength * _sampleDimension);
-
-        for (const auto& vec : _prototype)
-        {
-            result.insert(result.end(), vec.begin(), vec.end());
-        }
-        return result;
-    }
-
-    template <typename ValueType>
-    void DTWDistanceNode<ValueType>::Compile(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
-    {
-        static_assert(!std::is_same<ValueType, bool>(), "Cannot instantiate boolean DTW nodes");
-
-        auto inputType = GetPortVariableType(_input);
-        assert(inputType == GetPortVariableType(_output));
-        VerifyIsScalar(_output);
-
-        auto input = function.LocalArray(compiler.EnsurePortEmitted(_input));
-        auto result = compiler.EnsurePortEmitted(_output);
-
-        // The prototype (constant)
-        emitters::Variable* pVarPrototype = function.GetModule().Variables().AddVariable<emitters::LiteralVectorVariable<ValueType>>(GetPrototypeData());
-
-        // Global variables for the dynamic programming memory
-        emitters::Variable* pVarD = function.GetModule().Variables().AddVariable<emitters::InitializedVectorVariable<ValueType>>(emitters::VariableScope::global, _prototypeLength + 1);
-
-        // get global state vars
-        auto prototypeVector = function.LocalArray(function.GetModule().EnsureEmitted(*pVarPrototype));
-        auto pD = function.LocalArray(function.GetModule().EnsureEmitted(*pVarD));
-
-        // incorrect usage of function.Variable --- should use IRModuleEmitter::EmitX(variable)
-        auto dist = function.Variable(inputType, "dist");
-        auto protoIndex = function.Variable(emitters::VariableType::Int32, "i");
-        auto dLast = function.Variable(inputType, "dLast");
-        auto bestDist = function.Variable(inputType, "bestDist");
-
-        // initialize variables
-        function.StoreZero(protoIndex);
-        function.StoreZero(dLast);
-
-        function.For(_prototypeLength, [pD, dLast, bestDist, dist, protoIndex, input, prototypeVector, this](emitters::IRFunctionEmitter& function, emitters::IRLocalScalar iMinusOne) {
-            auto i = iMinusOne + 1;
-            auto d_iMinus1 = pD[iMinusOne];
-            auto dPrev_iMinus1 = function.LocalScalar(function.Load(dLast));
-            auto dPrev_i = pD[i];
-
-            function.Store(bestDist, static_cast<emitters::IRLocalScalar>(d_iMinus1));
-
-            function.If(dPrev_i < d_iMinus1, [bestDist, dPrev_i](auto& function) {
-                function.Store(bestDist, static_cast<emitters::IRLocalScalar>(dPrev_i));
-            });
-
-            function.If(dPrev_iMinus1 < function.Load(bestDist), [bestDist, dPrev_iMinus1](auto& function) {
-                function.Store(bestDist, dPrev_iMinus1);
-            });
-
-            // Get dist
-            function.StoreZero(dist);
-            function.For(_sampleDimension, [dist, protoIndex, input, prototypeVector](emitters::IRFunctionEmitter& function, auto j) {
-                auto inputValue = input[j];
-                auto protoValue = prototypeVector[function.LocalScalar(function.Load(protoIndex))];
-                auto absDiff = emitters::Abs(inputValue - protoValue);
-                function.OperationAndUpdate(dist, emitters::GetAddForValueType<ValueType>(), absDiff);
-                function.OperationAndUpdate(protoIndex, emitters::TypedOperator::add, function.Literal(1));
-            });
-
-            function.OperationAndUpdate(bestDist, emitters::GetAddForValueType<ValueType>(), function.Load(dist)); // x += dist;
-            pD[i] = function.Load(bestDist); // d[i] = x;
-        });
-
-        function.Store(result, function.Load(bestDist) / function.LocalScalar<ValueType>(_prototypeVariance));
-    }
-
-    template <typename ValueType>
-    void DTWDistanceNode<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
-    {
-        Node::WriteToArchive(archiver);
-        archiver[defaultInputPortName] << _input;
-        archiver[defaultOutputPortName] << _output;
-        // Since we know the prototype  will always be rectangular, we
-        // archive it as a matrix here.
-        auto numRows = _prototype.size();
-        auto numColumns = _prototype[0].size();
-        std::vector<double> elements;
-        elements.reserve(numRows * numColumns);
-        for (const auto& row : _prototype)
-        {
-            elements.insert(elements.end(), row.begin(), row.end());
-        }
-        archiver["prototype_rows"] << numRows;
-        archiver["prototype_columns"] << numColumns;
-        math::Matrix<double, math::MatrixLayout::columnMajor> temp(numRows, numColumns, elements);
-        math::MatrixArchiver::Write(temp, "prototype", archiver);
-    }
-
-    template <typename ValueType>
-    void DTWDistanceNode<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
-    {
-        Node::ReadFromArchive(archiver);
-        archiver[defaultInputPortName] >> _input;
-        archiver[defaultOutputPortName] >> _output;
-        size_t numRows;
-        size_t numColumns;
-        archiver["prototype_rows"] >> numRows;
-        archiver["prototype_columns"] >> numColumns;
-        math::Matrix<ValueType, math::MatrixLayout::columnMajor> temp(numRows, numColumns);
-        math::MatrixArchiver::Read(temp, "prototype", archiver);
-        for (size_t i = 0; i < numRows; i++)
-        {
-            _prototype.emplace_back(temp.GetRow(i).ToArray());
-        }
-        _prototypeLength = _prototype.size();
-        _d.resize(_prototypeLength + 1);
-        _s.resize(_prototypeLength + 1);
-    }
-} // namespace nodes
-} // namespace ell
diff --git a/libraries/nodes/tcc/DebugSinkNode.tcc b/libraries/nodes/tcc/DebugSinkNode.tcc
deleted file mode 100644
index 9a45b3c20..000000000
--- a/libraries/nodes/tcc/DebugSinkNode.tcc
+++ /dev/null
@@ -1,109 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     DebugSinkNode.tcc (nodes)
-//  Authors:  Chris Lovett
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#include <utilities/include/Debug.h>
-#include <utilities/include/Exception.h>
-
-namespace ell
-{
-namespace nodes
-{
-    template <typename ValueType>
-    DebugSinkNode<ValueType>::DebugSinkNode() :
-        CompilableNode({ &_input }, { &_output }),
-        _input(this, {}, defaultInputPortName),
-        _output(this, defaultOutputPortName, 0),
-        _userData(nullptr)
-    {
-    }
-
-    template <typename ValueType>
-    DebugSinkNode<ValueType>::DebugSinkNode(const model::OutputPort<ValueType>& input, DebugSinkFunction<ValueType> sink, const std::string& label, void* userData, const std::string& sinkFunctionName) :
-        CompilableNode({ &_input }, { &_output }),
-        _input(this, input, defaultInputPortName),
-        _output(this, defaultOutputPortName, _input.Size()),
-        _label(label),
-        _userData(userData),
-        _sinkFunctionName(sinkFunctionName),
-        _sink(std::move(sink))
-    {
-    }
-
-    template <typename ValueType>
-    bool DebugSinkNode<ValueType>::ShouldCompileInline() const
-    {
-        return true;
-    }
-
-    template <typename ValueType>
-    void DebugSinkNode<ValueType>::Compute() const
-    {
-        DEBUG_THROW(_sink == nullptr, utilities::InputException(utilities::InputExceptionErrors::nullReference, "Sink function is not set"));
-
-        auto result = EvaluateInput();
-        if (result && _sink != nullptr)
-        {
-            _sink(_label, _input.GetValue(), _userData);
-        }
-        _output.SetOutput(_input.GetValue());
-    }
-
-    template <typename ValueType>
-    void DebugSinkNode<ValueType>::Compile(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
-    {
-        emitters::LLVMValue pInput = compiler.EnsurePortEmitted(input);
-        auto userData = function.Pointer((char*)_userData);
-
-        // EvaluateInput defaults to 'pass through' in base implementation, which means
-        // we always call the sink function
-        const emitters::NamedVariableTypeList parameters = { { "label", emitters::GetVariableType<char*>() },
-                                                             { "output", emitters::GetPointerType(emitters::GetVariableType<ValueType>()) },
-                                                             { "userData", emitters::GetVariableType<char*>() } };
-
-        // Callback signature: void DebugSinkNode(char* label, ValueType* array, char* userData)
-        function.GetModule().DeclareFunction(_sinkFunctionName, emitters::VariableType::Void, parameters);
-        emitters::LLVMFunction pSinkFunction = function.GetModule().GetFunction(_sinkFunctionName);
-        function.Call(pSinkFunction, { function.Literal(_label), function.PointerOffset(pInput, function.Literal(0)), userData });
-
-        // Tag the sink function as a callback that is emitted in headers
-        function.IncludeInHeader();
-    }
-
-    template <typename ValueType>
-    void DebugSinkNode<ValueType>::Copy(model::ModelTransformer& transformer) const
-    {
-        const auto& newPortElements = transformer.GetCorrespondingInputs(_input);
-        auto newNode = transformer.AddNode<DebugSinkNode<ValueType>>(newPortElements, _sink, _label, _userData, _sinkFunctionName);
-        transformer.MapNodeOutput(output, newNode->output);
-    }
-
-    template <typename ValueType>
-    void DebugSinkNode<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
-    {
-        Node::WriteToArchive(archiver);
-        archiver[defaultInputPortName] << _input;
-        archiver["sinkFunctionName"] << _sinkFunctionName;
-    }
-
-    template <typename ValueType>
-    void DebugSinkNode<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
-    {
-        Node::ReadFromArchive(archiver);
-        archiver[defaultInputPortName] >> _input;
-        archiver["sinkFunctionName"] >> _sinkFunctionName;
-        // _sink needs to be set separately
-    }
-
-    template <typename ValueType>
-    bool DebugSinkNode<ValueType>::EvaluateInput() const
-    {
-        // Default pass through (derived classes will override).
-        return true;
-    }
-}; // namespace nodes
-} // namespace ell
\ No newline at end of file
diff --git a/libraries/nodes/tcc/DelayNode.tcc b/libraries/nodes/tcc/DelayNode.tcc
deleted file mode 100644
index 2890ec75a..000000000
--- a/libraries/nodes/tcc/DelayNode.tcc
+++ /dev/null
@@ -1,102 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     DelayNode.tcc (nodes)
-//  Authors:  Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace nodes
-{
-    template <typename ValueType>
-    DelayNode<ValueType>::DelayNode(const model::OutputPort<ValueType>& input, size_t windowSize) :
-        CompilableNode({ &_input }, { &_output }),
-        _input(this, input, defaultInputPortName),
-        _output(this, defaultOutputPortName, _input.Size()),
-        _windowSize(windowSize)
-    {
-        auto dimension = input.Size();
-        for (size_t index = 0; index < windowSize; ++index)
-        {
-            _samples.push_back(std::vector<ValueType>(dimension));
-        }
-    }
-
-    template <typename ValueType>
-    DelayNode<ValueType>::DelayNode() :
-        CompilableNode({ &_input }, { &_output }),
-        _input(this, {}, defaultInputPortName),
-        _output(this, defaultOutputPortName, 0),
-        _windowSize(0)
-    {
-    }
-
-    template <typename ValueType>
-    void DelayNode<ValueType>::Compute() const
-    {
-        auto lastBufferedSample = _samples[0];
-        _samples.push_back(_input.GetValue());
-        _samples.erase(_samples.begin());
-        _output.SetOutput(lastBufferedSample);
-    };
-
-    template <typename ValueType>
-    void DelayNode<ValueType>::Copy(model::ModelTransformer& transformer) const
-    {
-        const auto& newPortElements = transformer.GetCorrespondingInputs(_input);
-        auto newNode = transformer.AddNode<DelayNode<ValueType>>(newPortElements, _windowSize);
-        transformer.MapNodeOutput(output, newNode->output);
-    }
-
-    template <typename ValueType>
-    void DelayNode<ValueType>::Compile(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
-    {
-        emitters::LLVMValue result = compiler.EnsurePortEmitted(output);
-
-        size_t sampleSize = output.Size();
-        size_t windowSize = this->GetWindowSize();
-        size_t bufferSize = sampleSize * windowSize;
-
-        //
-        // Delay nodes are always long lived - either globals or heap. Currently, we use globals
-        // Each sample chunk is of size == sampleSize. The number of chunks we hold onto == windowSize
-        // We need two buffers - one for the entire lot, one for the "last" chunk forwarded to the next operator
-        //
-        emitters::Variable* delayLineVar = function.GetModule().Variables().AddVariable<emitters::InitializedVectorVariable<ValueType>>(emitters::VariableScope::global, bufferSize);
-        emitters::LLVMValue delayLine = function.GetModule().EnsureEmitted(*delayLineVar);
-
-        //
-        // We implement a delay as a Shift Register
-        //
-        emitters::LLVMValue inputBuffer = compiler.EnsurePortEmitted(input);
-        function.ShiftAndUpdate<ValueType>(delayLine, bufferSize, sampleSize, inputBuffer, result);
-    }
-
-    template <typename ValueType>
-    void DelayNode<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
-    {
-        Node::WriteToArchive(archiver);
-        archiver[defaultInputPortName] << _input;
-        archiver["windowSize"] << _windowSize;
-    }
-
-    template <typename ValueType>
-    void DelayNode<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
-    {
-        Node::ReadFromArchive(archiver);
-        archiver[defaultInputPortName] >> _input;
-        archiver["windowSize"] >> _windowSize;
-
-        auto dimension = _input.Size();
-        _samples.clear();
-        _samples.reserve(_windowSize);
-        for (size_t index = 0; index < _windowSize; ++index)
-        {
-            _samples.push_back(std::vector<ValueType>(dimension));
-        }
-        _output.SetSize(dimension);
-    }
-} // namespace nodes
-} // namespace ell
diff --git a/libraries/nodes/tcc/DemultiplexerNode.tcc b/libraries/nodes/tcc/DemultiplexerNode.tcc
deleted file mode 100644
index aa0d26a30..000000000
--- a/libraries/nodes/tcc/DemultiplexerNode.tcc
+++ /dev/null
@@ -1,116 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     DemultiplexerNode.tcc (nodes)
-//  Authors:  Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace nodes
-{
-    template <typename ValueType, typename SelectorType>
-    DemultiplexerNode<ValueType, SelectorType>::DemultiplexerNode() :
-        Node({ &_input, &_selector }, { &_output }),
-        _input(this, {}, defaultInputPortName),
-        _selector(this, {}, selectorPortName),
-        _output(this, defaultOutputPortName, 0),
-        _defaultValue(0)
-    {
-    }
-
-    template <typename ValueType, typename SelectorType>
-    DemultiplexerNode<ValueType, SelectorType>::DemultiplexerNode(const model::OutputPort<ValueType>& input, const model::OutputPort<SelectorType>& selector, size_t outputSize, ValueType defaultValue) :
-        Node({ &_input, &_selector }, { &_output }),
-        _input(this, input, defaultInputPortName),
-        _selector(this, selector, selectorPortName),
-        _output(this, defaultOutputPortName, outputSize),
-        _defaultValue(defaultValue)
-    {
-        if (selector.Size() != 1)
-        {
-            throw ell::utilities::Exception("Error: Condition must be 1-D signal");
-        }
-        if (input.Size() != 1)
-        {
-            throw ell::utilities::Exception("Error: Input must be 1-D signal");
-        }
-    }
-
-    template <typename ValueType, typename SelectorType>
-    void DemultiplexerNode<ValueType, SelectorType>::Compute() const
-    {
-        std::vector<ValueType> outputValue(_output.Size(), _defaultValue);
-        int index = (int)_selector[0];
-        outputValue[index] = _input[0];
-        _output.SetOutput(outputValue);
-    }
-
-    template <typename ValueType, typename SelectorType>
-    void DemultiplexerNode<ValueType, SelectorType>::WriteToArchive(utilities::Archiver& archiver) const
-    {
-        Node::WriteToArchive(archiver);
-        archiver[defaultInputPortName] << _input;
-        archiver[selectorPortName] << _selector;
-        archiver["size"] << _output.Size();
-        archiver["defaultValue"] << _defaultValue;
-    }
-
-    template <typename ValueType, typename SelectorType>
-    void DemultiplexerNode<ValueType, SelectorType>::ReadFromArchive(utilities::Unarchiver& archiver)
-    {
-        Node::ReadFromArchive(archiver);
-        archiver[defaultInputPortName] >> _input;
-        archiver[selectorPortName] >> _selector;
-        size_t size;
-        archiver["size"] >> size;
-        _output.SetSize(size);
-        archiver["defaultValue"] >> _defaultValue;
-    }
-
-    template <typename ValueType, typename SelectorType>
-    void DemultiplexerNode<ValueType, SelectorType>::Copy(model::ModelTransformer& transformer) const
-    {
-        const auto& newInput = transformer.GetCorrespondingInputs(_input);
-        const auto& newSelector = transformer.GetCorrespondingInputs(_selector);
-        auto newNode = transformer.AddNode<DemultiplexerNode<ValueType, SelectorType>>(newInput, newSelector, output.Size(), _defaultValue);
-        transformer.MapNodeOutput(output, newNode->output);
-    }
-
-    template <typename ValueType>
-    const model::OutputPort<int>& CastIfNecessary(const model::OutputPort<ValueType>& values, model::ModelTransformer& transformer)
-    {
-        auto castNode = transformer.AddNode<TypeCastNode<ValueType, int>>(values);
-        return castNode->output;
-    }
-
-    template <>
-    inline const model::OutputPort<int>& CastIfNecessary<int>(const model::OutputPort<int>& values, model::ModelTransformer& transformer)
-    {
-        return values;
-    }
-
-    template <typename ValueType, typename SelectorType>
-    bool DemultiplexerNode<ValueType, SelectorType>::Refine(model::ModelTransformer& transformer) const
-    {
-        const auto& newInput = transformer.GetCorrespondingInputs(_input);
-        const auto& newSelector = transformer.GetCorrespondingInputs(_selector);
-        const auto& newSelectorInt = CastIfNecessary(newSelector, transformer);
-
-        auto defaultNode = transformer.AddNode<ConstantNode<ValueType>>(_defaultValue);
-        model::PortElements<ValueType> outputElements;
-        auto size = _output.Size();
-        for (size_t index = 0; index < size; ++index)
-        {
-            auto indexNode = transformer.AddNode<ConstantNode<int>>(static_cast<int>(index));
-            auto isEqualNode = transformer.AddNode<BinaryPredicateNode<int>>(newSelectorInt, indexNode->output, emitters::BinaryPredicateType::equal);
-            auto ifNode = transformer.AddNode<nodes::MultiplexerNode<ValueType, bool>>(model::PortElements<ValueType>{ defaultNode->output, newInput }, isEqualNode->output);
-            outputElements.Append(ifNode->output);
-        }
-
-        transformer.MapNodeOutput(output, outputElements);
-        return true;
-    }
-} // namespace nodes
-} // namespace ell
diff --git a/libraries/nodes/tcc/DotProductNode.tcc b/libraries/nodes/tcc/DotProductNode.tcc
deleted file mode 100644
index 883ca226a..000000000
--- a/libraries/nodes/tcc/DotProductNode.tcc
+++ /dev/null
@@ -1,126 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     DotProductNode.tcc (nodes)
-//  Authors:  Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace nodes
-{
-    template <typename ValueType>
-    DotProductNode<ValueType>::DotProductNode() :
-        CompilableNode({ &_input1, &_input2 }, { &_output }),
-        _input1(this, {}, defaultInput1PortName),
-        _input2(this, {}, defaultInput2PortName),
-        _output(this, defaultOutputPortName, 1)
-    {
-    }
-
-    template <typename ValueType>
-    DotProductNode<ValueType>::DotProductNode(const model::OutputPort<ValueType>& input1, const model::OutputPort<ValueType>& input2) :
-        CompilableNode({ &_input1, &_input2 }, { &_output }),
-        _input1(this, input1, defaultInput1PortName),
-        _input2(this, input2, defaultInput2PortName),
-        _output(this, defaultOutputPortName, 1)
-    {
-    }
-
-    template <typename ValueType>
-    void DotProductNode<ValueType>::Compute() const
-    {
-        ValueType result = 0;
-        for (size_t index = 0; index < _input1.Size(); ++index)
-        {
-            result += _input1[index] * _input2[index];
-        }
-        _output.SetOutput({ result });
-    };
-
-    template <typename ValueType>
-    void DotProductNode<ValueType>::Copy(model::ModelTransformer& transformer) const
-    {
-        const auto& newInput1 = transformer.GetCorrespondingInputs(_input1);
-        const auto& newInput2 = transformer.GetCorrespondingInputs(_input2);
-        auto newNode = transformer.AddNode<DotProductNode<ValueType>>(newInput1, newInput2);
-        transformer.MapNodeOutput(output, newNode->output);
-    }
-
-    template <typename ValueType>
-    bool DotProductNode<ValueType>::Refine(model::ModelTransformer& transformer) const
-    {
-        // Maybe... in reality, dot product will likely want to be computed as in Compute() above
-        const auto& newInput1 = transformer.GetCorrespondingInputs(_input1);
-        const auto& newInput2 = transformer.GetCorrespondingInputs(_input2);
-        auto multNode = transformer.AddNode<BinaryOperationNode<ValueType>>(newInput1, newInput2, emitters::BinaryOperationType::coordinatewiseMultiply);
-        auto sumNode = transformer.AddNode<SumNode<ValueType>>(multNode->output);
-
-        transformer.MapNodeOutput(output, sumNode->output);
-        return true;
-    }
-
-    template <typename ValueType>
-    void DotProductNode<ValueType>::Compile(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
-    {
-        static_assert(!std::is_same<ValueType, bool>(), "Cannot instantiate boolean dot product nodes");
-        if (!compiler.GetCompilerOptions().unrollLoops)
-        {
-            CompileDotProductLoop(compiler, function);
-        }
-        else
-        {
-            CompileDotProductExpanded(compiler, function);
-        }
-    }
-
-    template <typename ValueType>
-    void DotProductNode<ValueType>::CompileDotProductLoop(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
-    {
-        emitters::LLVMValue pLVector = compiler.EnsurePortEmitted(input1);
-        emitters::LLVMValue pRVector = compiler.EnsurePortEmitted(input2);
-        int count = static_cast<int>(input1.Size());
-        emitters::LLVMValue pResult = compiler.EnsurePortEmitted(output);
-        if (compiler.GetCompilerOptions().inlineOperators)
-        {
-            function.DotProduct(count, pLVector, pRVector, pResult);
-        }
-        else
-        {
-            function.Call(function.GetModule().GetRuntime().GetDotProductFunction<ValueType>(), { function.Literal(count), function.PointerOffset(pLVector, 0), function.PointerOffset(pRVector, 0), function.PointerOffset(pResult, 0) });
-        }
-    }
-
-    template <typename ValueType>
-    void DotProductNode<ValueType>::CompileDotProductExpanded(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
-    {
-        emitters::LLVMValue pResult = compiler.EnsurePortEmitted(output);
-
-        function.StoreZero(pResult);
-        for (size_t i = 0; i < input1.Size(); ++i)
-        {
-            emitters::LLVMValue pLeftValue = compiler.LoadPortElementVariable(input1.GetInputElement(i));
-            emitters::LLVMValue pRightValue = compiler.LoadPortElementVariable(input2.GetInputElement(i));
-            emitters::LLVMValue pMultiplyResult = function.Operator(emitters::GetMultiplyForValueType<ValueType>(), pLeftValue, pRightValue);
-            function.OperationAndUpdate(pResult, emitters::GetAddForValueType<ValueType>(), pMultiplyResult);
-        }
-    }
-
-    template <typename ValueType>
-    void DotProductNode<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
-    {
-        Node::WriteToArchive(archiver);
-        archiver[defaultInput1PortName] << _input1;
-        archiver[defaultInput2PortName] << _input2;
-    }
-
-    template <typename ValueType>
-    void DotProductNode<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
-    {
-        Node::ReadFromArchive(archiver);
-        archiver[defaultInput1PortName] >> _input1;
-        archiver[defaultInput2PortName] >> _input2;
-    }
-} // namespace nodes
-} // namespace ell
diff --git a/libraries/nodes/tcc/ExtremalValueNode.tcc b/libraries/nodes/tcc/ExtremalValueNode.tcc
deleted file mode 100644
index e2c89de40..000000000
--- a/libraries/nodes/tcc/ExtremalValueNode.tcc
+++ /dev/null
@@ -1,186 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     ExtremalValueNode.tcc (nodes)
-//  Authors:  Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace nodes
-{
-    template <typename ValueType, bool max>
-    ExtremalValueNode<ValueType, max>::ExtremalValueNode() :
-        CompilableNode({ &_input }, { &_val, &_argVal }),
-        _input(this, {}, inputPortName),
-        _val(this, valPortName, 1),
-        _argVal(this, argValPortName, 1)
-    {
-    }
-
-    template <typename ValueType, bool max>
-    ExtremalValueNode<ValueType, max>::ExtremalValueNode(const model::OutputPort<ValueType>& input) :
-        CompilableNode({ &_input }, { &_val, &_argVal }),
-        _input(this, input, inputPortName),
-        _val(this, valPortName, 1),
-        _argVal(this, argValPortName, 1)
-    {
-    }
-
-    template <typename ValueType, bool max>
-    std::string ExtremalValueNode<ValueType, max>::GetTypeName()
-    {
-        if (max)
-        {
-            return utilities::GetCompositeTypeName<ValueType, std::true_type>("ExtremalValueNode");
-        }
-        else
-        {
-            return utilities::GetCompositeTypeName<ValueType, std::false_type>("ExtremalValueNode");
-        }
-    }
-
-    template <typename ValueType, bool max>
-    void ExtremalValueNode<ValueType, max>::Compute() const
-    {
-        auto inputValues = _input.GetValue();
-        decltype(std::max_element(inputValues.begin(), inputValues.end())) result;
-        if (max)
-        {
-            result = std::max_element(inputValues.begin(), inputValues.end());
-        }
-        else
-        {
-            result = std::min_element(inputValues.begin(), inputValues.end());
-        }
-        auto val = *result;
-        auto index = result - inputValues.begin();
-        _val.SetOutput({ val });
-        _argVal.SetOutput({ (int)index });
-    };
-
-    template <typename ValueType, bool max>
-    emitters::TypedComparison ExtremalValueNode<ValueType, max>::GetComparison() const
-    {
-        if (IsMaxNode())
-        {
-            return emitters::GetComparison<ValueType>(emitters::BinaryPredicateType::greater);
-        }
-        else
-        {
-            return emitters::GetComparison<ValueType>(emitters::BinaryPredicateType::less);
-        }
-    }
-
-    template <typename ValueType, bool max>
-    void ExtremalValueNode<ValueType, max>::Compile(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
-    {
-        VerifyIsScalar(val);
-        VerifyIsScalar(argVal);
-        if (!compiler.GetCompilerOptions().unrollLoops)
-        {
-            CompileLoop(compiler, function);
-        }
-        else
-        {
-            CompileExpanded(compiler, function);
-        }
-    }
-
-    template <typename ValueType, bool max>
-    void ExtremalValueNode<ValueType, max>::CompileLoop(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
-    {
-        emitters::LLVMValue inputVal = compiler.EnsurePortEmitted(input);
-        emitters::LLVMValue outVal = compiler.EnsurePortEmitted(val);
-        emitters::LLVMValue outArgVal = compiler.EnsurePortEmitted(argVal);
-        auto inputType = GetPortVariableType(input);
-        auto numInputs = input.Size();
-
-        emitters::LLVMValue bestVal = function.Variable(inputType, "bestVal");
-        emitters::LLVMValue bestIndex = function.Variable(ell::emitters::VariableType::Int32, "bestArgVal");
-
-        auto val0 = function.ValueAt(inputVal, function.Literal(0));
-        function.Store(bestVal, val0);
-        function.StoreZero(bestIndex);
-
-        function.For(1, numInputs, 1, [inputVal, bestVal, bestIndex, this](emitters::IRFunctionEmitter& function, emitters::LLVMValue i) {
-            auto val = function.ValueAt(inputVal, i);
-            function.If(GetComparison(), val, function.Load(bestVal), [bestVal, bestIndex, val, i](auto& function) {
-                function.Store(bestVal, val);
-                function.Store(bestIndex, i);
-            });
-        });
-
-        function.Store(outVal, function.Load(bestVal));
-        function.Store(outArgVal, function.Load(bestIndex));
-    }
-
-    template <typename ValueType, bool max>
-    void ExtremalValueNode<ValueType, max>::CompileExpanded(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
-    {
-        emitters::LLVMValue outVal = compiler.EnsurePortEmitted(val);
-        emitters::LLVMValue outArgVal = compiler.EnsurePortEmitted(argVal);
-        auto inputType = GetPortVariableType(input);
-        auto numInputs = input.Size();
-
-        emitters::LLVMValue bestVal = function.Variable(inputType, "bestVal");
-        emitters::LLVMValue bestIndex = function.Variable(ell::emitters::VariableType::Int32, "bestArgVal");
-
-        emitters::LLVMValue val0 = compiler.LoadPortElementVariable(input.GetInputElement(0));
-        function.Store(bestVal, val0);
-        function.StoreZero(bestIndex);
-
-        for (size_t i = 1; i < numInputs; ++i)
-        {
-            emitters::LLVMValue val = compiler.LoadPortElementVariable(input.GetInputElement(i));
-            function.If(GetComparison(), val, function.Load(bestVal), [bestVal, bestIndex, val, i](auto& function) {
-                function.Store(bestVal, val);
-                function.Store(bestIndex, function.Literal(static_cast<int>(i)));
-            });
-        }
-
-        function.Store(outVal, function.Load(bestVal));
-        function.Store(outArgVal, function.Load(bestIndex));
-    }
-
-    template <typename ValueType, bool max>
-    void ExtremalValueNode<ValueType, max>::WriteToArchive(utilities::Archiver& archiver) const
-    {
-        Node::WriteToArchive(archiver);
-        archiver[inputPortName] << _input;
-        archiver[valPortName] << _val;
-        archiver[argValPortName] << _argVal;
-    }
-
-    template <typename ValueType, bool max>
-    void ExtremalValueNode<ValueType, max>::ReadFromArchive(utilities::Unarchiver& archiver)
-    {
-        Node::ReadFromArchive(archiver);
-        archiver[inputPortName] >> _input;
-        archiver[valPortName] >> _val;
-        archiver[argValPortName] >> _argVal;
-    }
-
-    //
-    // Copy definitions for subclasses
-    //
-    template <typename ValueType>
-    void ArgMinNode<ValueType>::Copy(model::ModelTransformer& transformer) const
-    {
-        const auto& newPortElements = transformer.GetCorrespondingInputs(this->_input);
-        auto newNode = transformer.AddNode<ArgMinNode<ValueType>>(newPortElements);
-        transformer.MapNodeOutput(this->val, newNode->val);
-        transformer.MapNodeOutput(this->argVal, newNode->argVal);
-    }
-
-    template <typename ValueType>
-    void ArgMaxNode<ValueType>::Copy(model::ModelTransformer& transformer) const
-    {
-        const auto& newPortElements = transformer.GetCorrespondingInputs(this->_input);
-        auto newNode = transformer.AddNode<ArgMaxNode<ValueType>>(newPortElements);
-        transformer.MapNodeOutput(this->val, newNode->val);
-        transformer.MapNodeOutput(this->argVal, newNode->argVal);
-    }
-} // namespace nodes
-} // namespace ell
diff --git a/libraries/nodes/tcc/ForestPredictorNode.tcc b/libraries/nodes/tcc/ForestPredictorNode.tcc
deleted file mode 100644
index da65835b1..000000000
--- a/libraries/nodes/tcc/ForestPredictorNode.tcc
+++ /dev/null
@@ -1,185 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     ForestPredictorNode.tcc (nodes)
-//  Authors:  Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace nodes
-{
-    template <typename SplitRuleType, typename EdgePredictorType>
-    ForestPredictorNode<SplitRuleType, EdgePredictorType>::ForestPredictorNode(const model::OutputPort<double>& input, const predictors::ForestPredictor<SplitRuleType, EdgePredictorType>& forest) :
-        Node({ &_input }, { &_output, &_treeOutputs, &_edgeIndicatorVector }),
-        _input(this, input, defaultInputPortName),
-        _output(this, defaultOutputPortName, 1),
-        _treeOutputs(this, treeOutputsPortName, forest.NumTrees()),
-        _edgeIndicatorVector(this, edgeIndicatorVectorPortName, forest.NumEdges()),
-        _forest(forest)
-    {
-    }
-
-    template <typename SplitRuleType, typename EdgePredictorType>
-    ForestPredictorNode<SplitRuleType, EdgePredictorType>::ForestPredictorNode() :
-        Node({ &_input }, { &_output, &_treeOutputs, &_edgeIndicatorVector }),
-        _input(this, {}, defaultInputPortName),
-        _output(this, defaultOutputPortName, 1),
-        _treeOutputs(this, treeOutputsPortName, 0),
-        _edgeIndicatorVector(this, edgeIndicatorVectorPortName, 0)
-    {
-    }
-
-    template <typename SplitRuleType, typename EdgePredictorType>
-    void ForestPredictorNode<SplitRuleType, EdgePredictorType>::WriteToArchive(utilities::Archiver& archiver) const
-    {
-        Node::WriteToArchive(archiver);
-        archiver[defaultInputPortName] << _input;
-        archiver["forest"] << _forest;
-    }
-
-    template <typename SplitRuleType, typename EdgePredictorType>
-    void ForestPredictorNode<SplitRuleType, EdgePredictorType>::ReadFromArchive(utilities::Unarchiver& archiver)
-    {
-        Node::ReadFromArchive(archiver);
-        archiver[defaultInputPortName] >> _input;
-        archiver["forest"] >> _forest;
-
-        _treeOutputs.SetSize(_forest.NumTrees());
-        _edgeIndicatorVector.SetSize(_forest.NumEdges());
-    }
-
-    template <typename SplitRuleType, typename EdgePredictorType>
-    void ForestPredictorNode<SplitRuleType, EdgePredictorType>::Copy(model::ModelTransformer& transformer) const
-    {
-        const auto& newPortElements = transformer.GetCorrespondingInputs(_input);
-        auto newNode = transformer.AddNode<ForestPredictorNode<SplitRuleType, EdgePredictorType>>(newPortElements, _forest);
-        transformer.MapNodeOutput(output, newNode->output);
-        transformer.MapNodeOutput(treeOutputs, newNode->treeOutputs);
-        transformer.MapNodeOutput(edgeIndicatorVector, newNode->edgeIndicatorVector);
-    }
-
-    template <typename SplitRuleType, typename EdgePredictorType>
-    bool ForestPredictorNode<SplitRuleType, EdgePredictorType>::Refine(model::ModelTransformer& transformer) const
-    {
-        const auto& newPortElements = transformer.GetCorrespondingInputs(_input);
-        const auto& interiorNodes = _forest.GetInteriorNodes();
-
-        // create a place to store references to the output ports of the sub-models at each interior node
-        std::vector<model::PortElements<bool>> interiorNodeSplitIndicators(interiorNodes.size());
-        std::vector<model::PortElements<double>> interiorNodeSubModels(interiorNodes.size());
-
-        // visit interior nodes bottom-up (in reverse topological order)
-        for (int nodeIndex = static_cast<int>(interiorNodes.size()) - 1; nodeIndex >= 0; --nodeIndex) // Note: index var must be signed or else end condition is never met
-        {
-            const auto& edges = interiorNodes[nodeIndex].GetOutgoingEdges();
-
-            // get the sub-model that represents each outgoing edge
-            model::PortElements<double> edgeOutputs;
-            for (size_t j = 0; j < edges.size(); ++j)
-            {
-                const auto& edgePredictor = edges[j].GetPredictor();
-                auto edgePredictorNode = AddNodeToModelTransformer(newPortElements, edgePredictor, transformer);
-
-                if (edges[j].IsTargetInterior()) // target node is itself an interior node: reverse topological order guarantees that it's already visited
-                {
-                    model::PortElements<double> elements = interiorNodeSubModels[edges[j].GetTargetNodeIndex()];
-
-                    auto sumNode = transformer.AddNode<BinaryOperationNode<double>>(edgePredictorNode->output, elements, emitters::BinaryOperationType::add);
-                    edgeOutputs.Append(sumNode->output);
-                }
-                else // target node is a leaf
-                {
-                    edgeOutputs.Append(edgePredictorNode->output);
-                }
-            }
-
-            // add the sub-model that computes the split rule
-            auto splitRuleNode = AddNodeToModelTransformer(newPortElements, interiorNodes[nodeIndex].GetSplitRule(), transformer);
-            interiorNodeSplitIndicators[nodeIndex] = { splitRuleNode->output };
-
-            // ...and selects the output value
-            auto selectorNode = transformer.AddNode<MultiplexerNode<double, bool>>(edgeOutputs, splitRuleNode->output);
-            interiorNodeSubModels[nodeIndex] = { selectorNode->output };
-        }
-
-        // Now compute the edge indicator vector
-        auto trueNode = transformer.AddNode<ConstantNode<bool>>(true); // the constant 'true'
-        std::vector<model::PortElements<bool>> edgeIndicatorSubModels(_forest.NumEdges());
-
-        // Vector with index of the incoming edge for each internal node (with sentinel value of -1 for tree roots)
-        std::vector<int> incomingEdgeIndices(interiorNodes.size(), -1);
-        for (size_t nodeIndex = 0; nodeIndex < interiorNodes.size(); ++nodeIndex)
-        {
-            auto parentEdgeIndex = incomingEdgeIndices[nodeIndex];
-            auto isRoot = parentEdgeIndex == -1;
-            const auto& edgeSelector = interiorNodeSplitIndicators[nodeIndex];
-            const auto& node = interiorNodes[nodeIndex];
-            const auto& childEdges = node.GetOutgoingEdges();
-            auto numChildren = childEdges.size();
-            model::PortElements<bool> parentIndicator = isRoot ? trueNode->output : edgeIndicatorSubModels[parentEdgeIndex];
-
-            // The Demultiplexer node computes the indicator value for all the children at once, by copying its input value (a '1' if it's the root)
-            // to the selected child.
-            auto muxNode = transformer.AddNode<DemultiplexerNode<bool, bool>>(parentIndicator, edgeSelector, numChildren);
-            for (size_t edgePosition = 0; edgePosition < numChildren; ++edgePosition)
-            {
-                auto edgeIndex = node.GetFirstEdgeIndex() + edgePosition;
-                model::PortElements<bool> childOut = { muxNode->output, edgePosition };
-                edgeIndicatorSubModels[edgeIndex] = childOut;
-
-                // If this edge's target node has an outgoing edge, record ourself as its parent
-                if (childEdges[edgePosition].IsTargetInterior())
-                {
-                    auto childNode = childEdges[edgePosition].GetTargetNodeIndex();
-                    incomingEdgeIndices[childNode] = static_cast<int>(edgeIndex);
-                }
-            }
-        }
-        // collect the individual entries for the indicator vector into a single PortElements object
-        model::PortElements<bool> edgeIndicatorVectorElements(edgeIndicatorSubModels);
-
-        // collect the sub-models that represent the trees of the forest
-        model::PortElements<double> treeSubModels;
-        for (size_t rootIndex : _forest.GetRootIndices())
-        {
-            treeSubModels.Append(interiorNodeSubModels[rootIndex]);
-        }
-
-        // Make a copy and add the bias term
-        auto treesPlusBias = treeSubModels;
-        auto biasNode = transformer.AddNode<ConstantNode<double>>(_forest.GetBias());
-        treesPlusBias.Append(biasNode->output);
-
-        // Sum all of the trees
-        auto sumNode = transformer.AddNode<SumNode<double>>(treesPlusBias);
-
-        // Map all the outputs from the original node to the refined model outputs
-        transformer.MapNodeOutput(output, sumNode->output);
-        transformer.MapNodeOutput(treeOutputs, treeSubModels);
-        transformer.MapNodeOutput(edgeIndicatorVector, edgeIndicatorVectorElements);
-        return true;
-    }
-
-    template <typename SplitRuleType, typename EdgePredictorType>
-    void ForestPredictorNode<SplitRuleType, EdgePredictorType>::Compute() const
-    {
-        // forest output
-        auto inputDataVector = typename ForestPredictor::DataVectorType(_input.GetValue());
-        _output.SetOutput({ _forest.Predict(inputDataVector) });
-
-        // individual tree outputs
-        std::vector<double> treeOutputs(_forest.NumTrees());
-        for (size_t i = 0; i < _forest.NumTrees(); ++i)
-        {
-            treeOutputs[i] = _forest.Predict(inputDataVector, _forest.GetRootIndex(i));
-        }
-        _treeOutputs.SetOutput(std::move(treeOutputs));
-
-        // path indicator
-        auto edgeIndicator = _forest.GetEdgeIndicatorVector(inputDataVector);
-        _edgeIndicatorVector.SetOutput(std::move(edgeIndicator));
-    }
-} // namespace nodes
-} // namespace ell
diff --git a/libraries/nodes/tcc/HammingWindowNode.tcc b/libraries/nodes/tcc/HammingWindowNode.tcc
deleted file mode 100644
index d013a2a9f..000000000
--- a/libraries/nodes/tcc/HammingWindowNode.tcc
+++ /dev/null
@@ -1,75 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     HammingWindowNode.tcc (nodes)
-//  Authors:  Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace nodes
-{
-    template <typename ValueType>
-    HammingWindowNode<ValueType>::HammingWindowNode() :
-        Node({ &_input }, { &_output }),
-        _input(this, {}, defaultInputPortName),
-        _output(this, defaultOutputPortName, 0)
-    {
-    }
-
-    template <typename ValueType>
-    HammingWindowNode<ValueType>::HammingWindowNode(const model::OutputPort<ValueType>& input) :
-        Node({ &_input }, { &_output }),
-        _input(this, input, defaultInputPortName),
-        _output(this, defaultOutputPortName, input.Size())
-    {
-    }
-
-    template <typename ValueType>
-    void HammingWindowNode<ValueType>::Compute() const
-    {
-        auto size = _input.Size();
-        auto window = dsp::HammingWindow<ValueType>(size);
-        auto result = std::vector<ValueType>(size);
-        for (size_t index = 0; index < size; index++)
-        {
-            result[index] = _input[index] * window[index];
-        }
-        _output.SetOutput(result);
-    }
-
-    template <typename ValueType>
-    void HammingWindowNode<ValueType>::Copy(model::ModelTransformer& transformer) const
-    {
-        const auto& newPortElements = transformer.GetCorrespondingInputs(_input);
-        auto newNode = transformer.AddNode<HammingWindowNode<ValueType>>(newPortElements);
-        transformer.MapNodeOutput(output, newNode->output);
-    }
-
-    template <typename ValueType>
-    bool HammingWindowNode<ValueType>::Refine(model::ModelTransformer& transformer) const
-    {
-        const auto& newPortElements = transformer.GetCorrespondingInputs(_input);
-        auto constantNode = transformer.AddNode<ConstantNode<ValueType>>(dsp::HammingWindow<ValueType>(_input.Size()));
-        auto multiplyNode = transformer.AddNode<BinaryOperationNode<ValueType>>(newPortElements, constantNode->output, emitters::BinaryOperationType::coordinatewiseMultiply);
-        transformer.MapNodeOutput(output, multiplyNode->output);
-        return true;
-    }
-
-    template <typename ValueType>
-    void HammingWindowNode<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
-    {
-        Node::WriteToArchive(archiver);
-        archiver[defaultInputPortName] << _input;
-    }
-
-    template <typename ValueType>
-    void HammingWindowNode<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
-    {
-        Node::ReadFromArchive(archiver);
-        archiver[defaultInputPortName] >> _input;
-        _output.SetSize(_input.Size());
-    }
-} // namespace nodes
-} // namespace ell
diff --git a/libraries/nodes/tcc/L2NormSquaredNode.tcc b/libraries/nodes/tcc/L2NormSquaredNode.tcc
deleted file mode 100644
index d96838ddb..000000000
--- a/libraries/nodes/tcc/L2NormSquaredNode.tcc
+++ /dev/null
@@ -1,75 +0,0 @@
-///////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     L2NormSquaredNode.tcc (nodes)
-//  Authors:  Kern Handa
-//
-///////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace nodes
-{
-    template <typename ValueType>
-    L2NormSquaredNode<ValueType>::L2NormSquaredNode() :
-        Node({ &_input }, { &_output }),
-        _input(this, {}, defaultInputPortName),
-        _output(this, defaultOutputPortName, 1)
-    {
-    }
-
-    template <typename ValueType>
-    L2NormSquaredNode<ValueType>::L2NormSquaredNode(const model::OutputPort<ValueType>& input) :
-        Node({ &_input }, { &_output }),
-        _input(this, input, defaultInputPortName),
-        _output(this, defaultOutputPortName, 1)
-    {
-    }
-
-    template <typename ValueType>
-    void L2NormSquaredNode<ValueType>::Compute() const
-    {
-        ValueType result = 0;
-        for (size_t index = 0; index < _input.Size(); ++index)
-        {
-            auto v = _input[index];
-            result += (v * v);
-        }
-        _output.SetOutput({ result });
-    };
-
-    template <typename ValueType>
-    void L2NormSquaredNode<ValueType>::Copy(model::ModelTransformer& transformer) const
-    {
-        const auto& newPortElements = transformer.GetCorrespondingInputs(_input);
-        auto newNode = transformer.AddNode<L2NormSquaredNode<ValueType>>(newPortElements);
-        transformer.MapNodeOutput(output, newNode->output);
-    }
-
-    template <typename ValueType>
-    bool L2NormSquaredNode<ValueType>::Refine(model::ModelTransformer& transformer) const
-    {
-        const auto& newPortElements = transformer.GetCorrespondingInputs(_input);
-
-        auto squareInputNode = transformer.AddNode<UnaryOperationNode<ValueType>>(newPortElements, emitters::UnaryOperationType::square);
-        auto sumNode = transformer.AddNode<SumNode<ValueType>>(squareInputNode->output);
-
-        transformer.MapNodeOutput(output, sumNode->output);
-        return true;
-    }
-
-    template <typename ValueType>
-    void L2NormSquaredNode<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
-    {
-        Node::WriteToArchive(archiver);
-        archiver[defaultInputPortName] << _input;
-    }
-
-    template <typename ValueType>
-    void L2NormSquaredNode<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
-    {
-        Node::ReadFromArchive(archiver);
-        archiver[defaultInputPortName] >> _input;
-    }
-} // namespace nodes
-} // namespace ell
diff --git a/libraries/nodes/tcc/LinearPredictorNode.tcc b/libraries/nodes/tcc/LinearPredictorNode.tcc
deleted file mode 100644
index 477cb67a6..000000000
--- a/libraries/nodes/tcc/LinearPredictorNode.tcc
+++ /dev/null
@@ -1,94 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     LinearPredictorNode.tcc (nodes)
-//  Authors:  Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace nodes
-{
-    template <typename ElementType>
-    LinearPredictorNode<ElementType>::LinearPredictorNode() :
-        Node({ &_input }, { &_output, &_weightedElements }),
-        _input(this, {}, defaultInputPortName),
-        _output(this, defaultOutputPortName, 1),
-        _weightedElements(this, weightedElementsPortName, 0)
-    {
-    }
-
-    template <typename ElementType>
-    LinearPredictorNode<ElementType>::LinearPredictorNode(const model::OutputPort<ElementType>& input, const predictors::LinearPredictor<ElementType>& predictor) :
-        Node({ &_input }, { &_output, &_weightedElements }),
-        _input(this, input, defaultInputPortName),
-        _output(this, defaultOutputPortName, 1),
-        _weightedElements(this, weightedElementsPortName, input.Size()),
-        _predictor(predictor)
-    {
-        if (input.Size() != predictor.Size())
-        {
-            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "LinearPredictorNode: input size must match the predictor size");
-        }
-    }
-
-    template <typename ElementType>
-    void LinearPredictorNode<ElementType>::WriteToArchive(utilities::Archiver& archiver) const
-    {
-        Node::WriteToArchive(archiver);
-        archiver[defaultInputPortName] << _input;
-        archiver["weightedElements"] << _weightedElements;
-        archiver["predictor"] << _predictor;
-    }
-
-    template <typename ElementType>
-    void LinearPredictorNode<ElementType>::ReadFromArchive(utilities::Unarchiver& archiver)
-    {
-        Node::ReadFromArchive(archiver);
-        archiver[defaultInputPortName] >> _input;
-        archiver["weightedElements"] >> _weightedElements;
-        archiver["predictor"] >> _predictor;
-    }
-
-    template <typename ElementType>
-    void LinearPredictorNode<ElementType>::Copy(model::ModelTransformer& transformer) const
-    {
-        const auto& newPortElements = transformer.GetCorrespondingInputs(_input);
-        auto newNode = transformer.AddNode<LinearPredictorNode>(newPortElements, _predictor);
-        transformer.MapNodeOutput(output, newNode->output);
-        transformer.MapNodeOutput(weightedElements, newNode->weightedElements);
-    }
-
-    template <typename ElementType>
-    bool LinearPredictorNode<ElementType>::Refine(model::ModelTransformer& transformer) const
-    {
-        const auto& newPortElements = transformer.GetCorrespondingInputs(_input);
-
-        auto weightsNode = transformer.AddNode<ConstantNode<ElementType>>(_predictor.GetWeights().ToArray());
-        auto dotProductNode = transformer.AddNode<DotProductNode<ElementType>>(weightsNode->output, newPortElements);
-        auto coordinatewiseMultiplyNode = transformer.AddNode<BinaryOperationNode<ElementType>>(weightsNode->output, newPortElements, emitters::BinaryOperationType::coordinatewiseMultiply);
-        auto biasNode = transformer.AddNode<ConstantNode<ElementType>>(_predictor.GetBias());
-        auto addNode = transformer.AddNode<BinaryOperationNode<ElementType>>(dotProductNode->output, biasNode->output, emitters::BinaryOperationType::add);
-
-        transformer.MapNodeOutput(output, addNode->output);
-        transformer.MapNodeOutput(weightedElements, coordinatewiseMultiplyNode->output);
-        return true;
-    }
-
-    template <typename ElementType>
-    void LinearPredictorNode<ElementType>::Compute() const
-    {
-        using DataVectorType = typename LinearPredictorType::DataVectorType;
-        auto inputDataVector = DataVectorType(_input.GetValue());
-        _output.SetOutput({ _predictor.Predict(inputDataVector) });
-        _weightedElements.SetOutput(_predictor.GetWeightedElements(inputDataVector).ToArray());
-    }
-
-    template <typename ElementType>
-    LinearPredictorNode<ElementType>* AddNodeToModelTransformer(const model::PortElements<ElementType>& input, const predictors::LinearPredictor<ElementType>& predictor, model::ModelTransformer& transformer)
-    {
-        return transformer.AddNode<LinearPredictorNode<ElementType>>(input, predictor);
-    }
-} // namespace nodes
-} // namespace ell
diff --git a/libraries/nodes/tcc/MatrixVectorProductNode.tcc b/libraries/nodes/tcc/MatrixVectorProductNode.tcc
deleted file mode 100644
index 68259a193..000000000
--- a/libraries/nodes/tcc/MatrixVectorProductNode.tcc
+++ /dev/null
@@ -1,119 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     MatrixVectorProductNode.tcc (nodes)
-//  Authors:  Suresh Iyengar
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace nodes
-{
-    template <typename ValueType, math::MatrixLayout layout>
-    MatrixVectorProductNode<ValueType, layout>::MatrixVectorProductNode() :
-        Node({ &_input }, { &_output }),
-        _input(this, {}, defaultInputPortName),
-        _output(this, defaultOutputPortName, 1),
-        _w(0, 0)
-    {
-    }
-
-    template <typename ValueType, math::MatrixLayout layout>
-    MatrixVectorProductNode<ValueType, layout>::MatrixVectorProductNode(const model::OutputPort<ValueType>& input, const math::Matrix<ValueType, layout>& w) :
-        Node({ &_input }, { &_output }),
-        _input(this, input, defaultInputPortName),
-        _output(this, defaultOutputPortName, w.NumRows()),
-        _w(w)
-    {
-        if (input.Size() != w.NumColumns())
-        {
-            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "MatrixVectorProductNode: input size must match the number of columns in the 'w' matrix");
-        }
-    }
-
-    template <typename ValueType, math::MatrixLayout layout>
-    void MatrixVectorProductNode<ValueType, layout>::WriteToArchive(utilities::Archiver& archiver) const
-    {
-        Node::WriteToArchive(archiver);
-
-        archiver["w_rows"] << _w.NumRows();
-        archiver["w_columns"] << _w.NumColumns();
-        std::vector<ValueType> temp;
-        temp.assign(_w.GetConstDataPointer(), _w.GetConstDataPointer() + (size_t)(_w.NumRows() * _w.NumColumns()));
-        archiver["w"] << temp;
-
-        archiver[defaultInputPortName] << _input;
-        archiver[defaultOutputPortName] << _output;
-    }
-
-    template <typename ValueType, math::MatrixLayout layout>
-    void MatrixVectorProductNode<ValueType, layout>::ReadFromArchive(utilities::Unarchiver& archiver)
-    {
-        Node::ReadFromArchive(archiver);
-
-        size_t w_rows = 0;
-        size_t w_columns = 0;
-        archiver["w_rows"] >> w_rows;
-        archiver["w_columns"] >> w_columns;
-        std::vector<ValueType> temp;
-        archiver["w"] >> temp;
-        _w = math::Matrix<ValueType, layout>(w_rows, w_columns, temp);
-
-        archiver[defaultInputPortName] >> _input;
-        archiver[defaultOutputPortName] >> _output;
-    }
-
-    template <typename ValueType, math::MatrixLayout layout>
-    void MatrixVectorProductNode<ValueType, layout>::Copy(model::ModelTransformer& transformer) const
-    {
-        const auto& newPortElements = transformer.GetCorrespondingInputs(_input);
-        auto newNode = transformer.AddNode<MatrixVectorProductNode<ValueType, layout>>(newPortElements, _w);
-        transformer.MapNodeOutput(output, newNode->output);
-    }
-
-    template <typename ValueType, math::MatrixLayout layout>
-    bool MatrixVectorProductNode<ValueType, layout>::Refine(model::ModelTransformer& transformer) const
-    {
-        const auto& newInput = transformer.GetCorrespondingInputs(_input);
-
-        // Make sure we have a RowMatrix (because that's what MatrixVectorMultiplyNode wants)
-        math::RowMatrix<ValueType> projectionMatrix(_w);
-        auto projectionMatrixValue = projectionMatrix.ToArray();
-        auto projectionMatrixNode = transformer.AddNode<ConstantNode<ValueType>>(projectionMatrixValue);
-        auto m = projectionMatrix.NumRows();
-        auto n = projectionMatrix.NumColumns();
-        auto matrixStride = projectionMatrix.GetIncrement();
-        if (matrixStride == 0 || matrixStride < m)
-        {
-            utilities::InputException(utilities::InputExceptionErrors::badData, "Matrix has an invalid stride");
-        }
-        auto matrixMultiplyNode = transformer.AddNode<MatrixVectorMultiplyNode<ValueType>>(projectionMatrixNode->output, m, n, matrixStride, newInput);
-        transformer.MapNodeOutput(output, matrixMultiplyNode->output);
-        return true;
-    }
-
-    template <typename ValueType, math::MatrixLayout layout>
-    void MatrixVectorProductNode<ValueType, layout>::Compute() const
-    {
-        math::ColumnVector<ValueType> input(_input.Size());
-        for (size_t index = 0; index < _input.Size(); ++index)
-        {
-            input[index] = _input[index];
-        }
-
-        math::ColumnVector<ValueType> result(_w.NumRows());
-
-        // result = _w * data
-        math::MultiplyScaleAddUpdate(static_cast<ValueType>(1), _w, input, static_cast<ValueType>(0), result);
-
-        _output.SetOutput(result.ToArray());
-    }
-
-    template <typename ValueType, math::MatrixLayout layout>
-    MatrixVectorProductNode<ValueType, layout>* AddNodeToModelTransformer(const model::PortElements<ValueType>& input, math::ConstMatrixReference<ValueType, layout> w, model::ModelTransformer& transformer)
-    {
-        return transformer.AddNode<MatrixVectorProductNode>(input, w);
-    }
-} // namespace nodes
-} // namespace ell
diff --git a/libraries/nodes/tcc/MovingAverageNode.tcc b/libraries/nodes/tcc/MovingAverageNode.tcc
deleted file mode 100644
index 00e8b0333..000000000
--- a/libraries/nodes/tcc/MovingAverageNode.tcc
+++ /dev/null
@@ -1,102 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     MovingAverageNode.tcc (nodes)
-//  Authors:  Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace nodes
-{
-    template <typename ValueType>
-    MovingAverageNode<ValueType>::MovingAverageNode() :
-        Node({ &_input }, { &_output }),
-        _input(this, {}, defaultInputPortName),
-        _output(this, defaultOutputPortName, 0),
-        _windowSize(0)
-    {
-    }
-
-    template <typename ValueType>
-    MovingAverageNode<ValueType>::MovingAverageNode(const model::OutputPort<ValueType>& input, size_t windowSize) :
-        Node({ &_input }, { &_output }),
-        _input(this, input, defaultInputPortName),
-        _output(this, defaultOutputPortName, _input.Size()),
-        _windowSize(windowSize)
-    {
-        auto dimension = _input.Size();
-        for (size_t index = 0; index < _windowSize; ++index)
-        {
-            _samples.push_back(std::vector<ValueType>(dimension));
-        }
-        _runningSum = std::vector<ValueType>(dimension);
-    }
-
-    template <typename ValueType>
-    void MovingAverageNode<ValueType>::Compute() const
-    {
-        auto inputSample = _input.GetValue();
-        auto lastBufferedSample = _samples[0];
-        _samples.push_back(inputSample);
-        _samples.erase(_samples.begin());
-
-        std::vector<ValueType> result(_input.Size());
-        for (size_t index = 0; index < inputSample.size(); ++index)
-        {
-            _runningSum[index] += (inputSample[index] - lastBufferedSample[index]);
-            result[index] = _runningSum[index] / _windowSize;
-        }
-        _output.SetOutput(result);
-    };
-
-    template <typename ValueType>
-    void MovingAverageNode<ValueType>::Copy(model::ModelTransformer& transformer) const
-    {
-        const auto& newPortElements = transformer.GetCorrespondingInputs(_input);
-        auto newNode = transformer.AddNode<MovingAverageNode<ValueType>>(newPortElements, _windowSize);
-        transformer.MapNodeOutput(output, newNode->output);
-    }
-
-    template <typename ValueType>
-    bool MovingAverageNode<ValueType>::Refine(model::ModelTransformer& transformer) const
-    {
-        const auto& newPortElements = transformer.GetCorrespondingInputs(_input);
-        auto delayNode = transformer.AddNode<DelayNode<ValueType>>(newPortElements, _windowSize);
-        auto subtractNode = transformer.AddNode<BinaryOperationNode<ValueType>>(newPortElements, delayNode->output, emitters::BinaryOperationType::subtract);
-        auto accumNode = transformer.AddNode<AccumulatorNode<ValueType>>(subtractNode->output);
-        std::vector<ValueType> literalN(newPortElements.Size(), (ValueType)_windowSize);
-        auto constNode = transformer.AddNode<ConstantNode<ValueType>>(literalN);
-        auto divideNode = transformer.AddNode<BinaryOperationNode<ValueType>>(accumNode->output, constNode->output, emitters::BinaryOperationType::coordinatewiseDivide);
-        transformer.MapNodeOutput(output, divideNode->output);
-        return true;
-    }
-
-    template <typename ValueType>
-    void MovingAverageNode<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
-    {
-        Node::WriteToArchive(archiver);
-        archiver[defaultInputPortName] << _input;
-        archiver["windowSize"] << _windowSize;
-    }
-
-    template <typename ValueType>
-    void MovingAverageNode<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
-    {
-        Node::ReadFromArchive(archiver);
-        archiver[defaultInputPortName] >> _input;
-        archiver["windowSize"] >> _windowSize;
-
-        auto dimension = _input.Size();
-        _samples.clear();
-        _samples.reserve(_windowSize);
-        for (size_t index = 0; index < _windowSize; ++index)
-        {
-            _samples.push_back(std::vector<ValueType>(dimension));
-        }
-        _runningSum = std::vector<ValueType>(dimension);
-        _output.SetSize(dimension);
-    }
-} // namespace nodes
-} // namespace ell
diff --git a/libraries/nodes/tcc/MovingVarianceNode.tcc b/libraries/nodes/tcc/MovingVarianceNode.tcc
deleted file mode 100644
index 69694bfaf..000000000
--- a/libraries/nodes/tcc/MovingVarianceNode.tcc
+++ /dev/null
@@ -1,90 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     MovingVarianceNode.tcc (nodes)
-//  Authors:  Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace nodes
-{
-    template <typename ValueType>
-    MovingVarianceNode<ValueType>::MovingVarianceNode() :
-        Node({ &_input }, { &_output }),
-        _input(this, {}, defaultInputPortName),
-        _output(this, defaultOutputPortName, 0),
-        _windowSize(0)
-    {
-    }
-
-    template <typename ValueType>
-    MovingVarianceNode<ValueType>::MovingVarianceNode(const model::OutputPort<ValueType>& input, size_t windowSize) :
-        Node({ &_input }, { &_output }),
-        _input(this, input, defaultInputPortName),
-        _output(this, defaultOutputPortName, _input.Size()),
-        _windowSize(windowSize)
-    {
-        auto dimension = _input.Size();
-        for (size_t index = 0; index < _windowSize; ++index)
-        {
-            _samples.push_back(std::vector<ValueType>(dimension));
-        }
-        _runningSum = std::vector<ValueType>(dimension);
-        _runningSquaredSum = std::vector<ValueType>(dimension);
-    }
-
-    template <typename ValueType>
-    void MovingVarianceNode<ValueType>::Compute() const
-    {
-        static auto squared = [](const ValueType& x) { return x * x; };
-
-        auto inputSample = _input.GetValue();
-        auto lastBufferedSample = _samples[0];
-        _samples.push_back(inputSample);
-        _samples.erase(_samples.begin());
-
-        std::vector<ValueType> result(_input.Size());
-        for (size_t index = 0; index < inputSample.size(); ++index)
-        {
-            _runningSum[index] += (inputSample[index] - lastBufferedSample[index]);
-            _runningSquaredSum[index] += squared(inputSample[index]) - squared(lastBufferedSample[index]);
-            result[index] = (_runningSquaredSum[index] - (squared(_runningSum[index]) / _windowSize)) / _windowSize;
-        }
-        _output.SetOutput(result);
-    };
-
-    template <typename ValueType>
-    void MovingVarianceNode<ValueType>::Copy(model::ModelTransformer& transformer) const
-    {
-        const auto& newPortElements = transformer.GetCorrespondingInputs(_input);
-        auto newNode = transformer.AddNode<MovingVarianceNode<ValueType>>(newPortElements, _windowSize);
-        transformer.MapNodeOutput(output, newNode->output);
-    }
-
-    template <typename ValueType>
-    void MovingVarianceNode<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
-    {
-        Node::WriteToArchive(archiver);
-        archiver[defaultInputPortName] << _input;
-        archiver["windowSize"] << _windowSize;
-    }
-
-    template <typename ValueType>
-    void MovingVarianceNode<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
-    {
-        Node::ReadFromArchive(archiver);
-        archiver[defaultInputPortName] >> _input;
-        archiver["windowSize"] >> _windowSize;
-
-        auto dimension = _input.Size();
-        _samples.clear();
-        _samples.reserve(_windowSize);
-        std::generate_n(std::back_inserter(_samples), _windowSize, [dimension] { return std::vector<ValueType>(dimension); });
-        _runningSum = std::vector<ValueType>(dimension);
-        _runningSquaredSum = std::vector<ValueType>(dimension);
-        _output.SetSize(dimension);
-    }
-} // namespace nodes
-} // namespace ell
diff --git a/libraries/nodes/tcc/MultiplexerNode.tcc b/libraries/nodes/tcc/MultiplexerNode.tcc
deleted file mode 100644
index f181fd1f7..000000000
--- a/libraries/nodes/tcc/MultiplexerNode.tcc
+++ /dev/null
@@ -1,139 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     MultiplexerNode.tcc (nodes)
-//  Authors:  Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace nodes
-{
-    template <typename ValueType, typename SelectorType>
-    MultiplexerNode<ValueType, SelectorType>::MultiplexerNode() :
-        CompilableNode({ &_elements, &_selector }, { &_output }),
-        _elements(this, {}, elementsPortName),
-        _selector(this, {}, selectorPortName),
-        _output(this, defaultOutputPortName, 1)
-    {
-    }
-
-    template <typename ValueType, typename SelectorType>
-    MultiplexerNode<ValueType, SelectorType>::MultiplexerNode(const model::OutputPort<ValueType>& input, const model::OutputPort<SelectorType>& selector) :
-        CompilableNode({ &_elements, &_selector }, { &_output }),
-        _elements(this, input, elementsPortName),
-        _selector(this, selector, selectorPortName),
-        _output(this, defaultOutputPortName, 1)
-    {
-        if (selector.Size() != 1)
-        {
-            throw ell::utilities::Exception("Error: Condition must be 1-D signal");
-        }
-    };
-
-    template <typename ValueType, typename SelectorType>
-    void MultiplexerNode<ValueType, SelectorType>::Compute() const
-    {
-        int index = static_cast<int>(_selector[0]);
-        _output.SetOutput({ _elements[index] });
-    }
-
-    template <typename ValueType, typename SelectorType>
-    void MultiplexerNode<ValueType, SelectorType>::Copy(model::ModelTransformer& transformer) const
-    {
-        const auto& newElements = transformer.GetCorrespondingInputs(_elements);
-        const auto& newSelector = transformer.GetCorrespondingInputs(_selector);
-        auto newNode = transformer.AddNode<MultiplexerNode<ValueType, SelectorType>>(newElements, newSelector);
-        transformer.MapNodeOutput(output, newNode->output);
-    }
-
-    template <typename ValueType, typename SelectorType>
-    void MultiplexerNode<ValueType, SelectorType>::Compile(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
-    {
-        if (std::is_same<SelectorType, bool>())
-        {
-            CompileMultiplexerBinary(compiler, function);
-        }
-        else if (std::is_same<SelectorType, int>())
-        {
-            CompileUnrolled(compiler, function);
-        }
-        else
-        {
-            throw emitters::EmitterException(emitters::EmitterError::valueTypeNotSupported, "Multiplexer node selectors must be bool or int");
-        }
-    }
-
-    template <typename ValueType, typename SelectorType>
-    void MultiplexerNode<ValueType, SelectorType>::CompileMultiplexerBinary(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
-    {
-        VerifyIsScalar(selector);
-        VerifyIsScalar(output);
-
-        emitters::LLVMValue pSelector = compiler.EnsurePortEmitted(selector);
-        emitters::LLVMValue pSelectorVal = function.Load(pSelector);
-        emitters::LLVMValue pResult = compiler.EnsurePortEmitted(output);
-        auto lVal = elements.GetInputElement(0); // lval is selected if the result of the "if" comparison is NON-zero
-        auto rVal = elements.GetInputElement(1);
-        auto pLMergeableSrc = compiler.GetMergeableNodeRegion(lVal);
-        auto pRMergeableSrc = compiler.GetMergeableNodeRegion(rVal);
-
-        function.If(emitters::TypedComparison::equals, pSelectorVal, function.Literal<SelectorType>(0), [pLMergeableSrc, pResult, &compiler, this](emitters::IRFunctionEmitter& function) {
-                    if (pLMergeableSrc != nullptr)
-                    {
-                        function.MergeRegion(pLMergeableSrc);
-                    }
-                    function.Store(pResult, compiler.LoadPortElementVariable(elements.GetInputElement(0)));
-                })
-            .Else([pRMergeableSrc, pResult, &compiler, this](emitters::IRFunctionEmitter& function) {
-                if (pRMergeableSrc != nullptr)
-                {
-                    function.MergeRegion(pRMergeableSrc);
-                }
-                function.Store(pResult, compiler.LoadPortElementVariable(elements.GetInputElement(1)));
-            });
-
-        auto pSelectorNode = selector.GetParentNodes()[0];
-        if (HasSingleDescendant(*pSelectorNode))
-        {
-            compiler.TryMergeNodeRegions(*pSelectorNode, *this);
-        }
-    }
-
-    template <typename ValueType, typename SelectorType>
-    void MultiplexerNode<ValueType, SelectorType>::CompileUnrolled(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
-    {
-        VerifyIsScalar(selector);
-        VerifyIsScalar(output);
-        auto numElements = elements.Size();
-
-        emitters::LLVMValue pSelector = compiler.EnsurePortEmitted(selector);
-        auto pSelectorVal = function.Load(pSelector);
-        emitters::LLVMValue result = compiler.EnsurePortEmitted(output);
-        for (size_t index = 0; index < numElements; ++index)
-        {
-            function.If(emitters::TypedComparison::equals, function.Literal((int)index), pSelectorVal, [index, result, &compiler, this](emitters::IRFunctionEmitter& function) {
-                emitters::LLVMValue val = compiler.LoadPortElementVariable(elements.GetInputElement(index));
-                function.Store(result, val);
-            });
-        }
-    }
-
-    template <typename ValueType, typename SelectorType>
-    void MultiplexerNode<ValueType, SelectorType>::WriteToArchive(utilities::Archiver& archiver) const
-    {
-        Node::WriteToArchive(archiver);
-        archiver["elements"] << _elements;
-        archiver["selector"] << _selector;
-    }
-
-    template <typename ValueType, typename SelectorType>
-    void MultiplexerNode<ValueType, SelectorType>::ReadFromArchive(utilities::Unarchiver& archiver)
-    {
-        Node::ReadFromArchive(archiver);
-        archiver["elements"] >> _elements;
-        archiver["selector"] >> _selector;
-    }
-} // namespace nodes
-} // namespace ell
diff --git a/libraries/nodes/tcc/NeuralNetworkLayerNode.tcc b/libraries/nodes/tcc/NeuralNetworkLayerNode.tcc
deleted file mode 100644
index febf0f998..000000000
--- a/libraries/nodes/tcc/NeuralNetworkLayerNode.tcc
+++ /dev/null
@@ -1,166 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     NeuralNetworkLayerNode.tcc (nodes)
-//  Authors:  Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace nodes
-{
-    //
-    // NeuralNetworkLayerNodeBase
-    //
-
-    template <typename ValueType>
-    NeuralNetworkLayerNodeBase<ValueType>::NeuralNetworkLayerNodeBase() :
-        CompilableNode({ &_input }, { &_output }),
-        _input(this, {}, defaultInputPortName),
-        _output(this, defaultOutputPortName, 0)
-    {
-        _parameters.includePaddingInInputData = true;
-    }
-
-    template <typename ValueType>
-    NeuralNetworkLayerNodeBase<ValueType>::NeuralNetworkLayerNodeBase(const model::OutputPort<ValueType>& input, const NeuralNetworkLayerNodeParameters& parameters, size_t outputSize) :
-        CompilableNode({ &_input }, { &_output }),
-        _input(this, input, defaultInputPortName),
-        _output(this, defaultOutputPortName, outputSize),
-        _parameters(parameters)
-    {
-    }
-
-    template <typename ValueType>
-    void NeuralNetworkLayerNodeBase<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
-    {
-        CompilableNode::WriteToArchive(archiver);
-        archiver[defaultInputPortName] << _input;
-    }
-
-    template <typename ValueType>
-    void NeuralNetworkLayerNodeBase<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
-    {
-        CompilableNode::ReadFromArchive(archiver);
-        archiver[defaultInputPortName] >> _input;
-    }
-
-    //
-    // NeuralNetworkLayerNode
-    //
-    template <typename DerivedType, typename LayerType, typename ValueType>
-    NeuralNetworkLayerNode<DerivedType, LayerType, ValueType>::NeuralNetworkLayerNode() :
-        NeuralNetworkLayerNodeBase<ValueType>(),
-        _inputShape(0, 0, 0)
-    {
-    }
-
-    template <typename DerivedType, typename LayerType, typename ValueType>
-    NeuralNetworkLayerNode<DerivedType, LayerType, ValueType>::NeuralNetworkLayerNode(const model::OutputPort<ValueType>& input, const LayerType& layer) :
-        NeuralNetworkLayerNodeBase<ValueType>(input, {}, layer.GetOutput().Size()),
-        _inputTensor(layer.GetInputShape()),
-        _layer(layer),
-        _inputShape(layer.GetInputShape())
-    {
-        _layer.GetLayerParameters().input = _inputTensor;
-
-        const auto& layerParameters = _layer.GetLayerParameters();
-
-        // Calculate input dimension parameters
-        size_t inputPaddingSize = layerParameters.inputPaddingParameters.paddingSize;
-        auto inputShape = this->GetLayer().GetInputShape();
-        _inputLayout = CalculateMemoryLayout(inputPaddingSize, inputShape);
-
-        // Calculate output dimension parameters
-        size_t outputPaddingSize = layerParameters.outputPaddingParameters.paddingSize;
-        auto outputShape = this->_layer.GetOutputShape();
-        _output.SetMemoryLayout(CalculateMemoryLayout(outputPaddingSize, outputShape));
-    }
-
-    template <typename DerivedType, typename LayerType, typename ValueType>
-    model::PortMemoryLayout NeuralNetworkLayerNode<DerivedType, LayerType, ValueType>::CalculateMemoryLayout(size_t padding, typename predictors::neural::Layer<ValueType>::Shape dataBufferSize)
-    {
-        // Calculate dimension parameters
-        math::IntegerTriplet dataSizeArray = dataBufferSize;
-        model::MemoryShape stride{ { static_cast<int>(dataSizeArray[0]), static_cast<int>(dataSizeArray[1]), static_cast<int>(dataSizeArray[2]) } };
-        model::MemoryShape offset{ static_cast<int>(padding), static_cast<int>(padding), 0 };
-        model::MemoryShape size({});
-        size.Resize(stride.NumDimensions());
-        for (int dimensionIndex = 0; dimensionIndex < offset.NumDimensions(); ++dimensionIndex)
-        {
-            if (stride[dimensionIndex] < (2 * offset[dimensionIndex]))
-            {
-                throw utilities::InputException(utilities::InputExceptionErrors::sizeMismatch, "Data size not large enough to accommodate padding");
-            }
-            size[dimensionIndex] = stride[dimensionIndex] - (2 * offset[dimensionIndex]);
-        }
-
-        return { size, stride, offset };
-    }
-
-    template <typename DerivedType, typename LayerType, typename ValueType>
-    utilities::ArchiveVersion NeuralNetworkLayerNode<DerivedType, LayerType, ValueType>::GetArchiveVersion() const
-    {
-        constexpr utilities::ArchiveVersion archiveVersion = { utilities::ArchiveVersionNumbers::v5_refined_nodes };
-
-        return archiveVersion;
-    }
-
-    template <typename DerivedType, typename LayerType, typename ValueType>
-    bool NeuralNetworkLayerNode<DerivedType, LayerType, ValueType>::CanReadArchiveVersion(const utilities::ArchiveVersion& version) const
-    {
-        constexpr utilities::ArchiveVersion archiveVersion = { utilities::ArchiveVersionNumbers::v5_refined_nodes };
-
-        return version >= archiveVersion;
-    }
-
-    template <typename DerivedType, typename LayerType, typename ValueType>
-    void NeuralNetworkLayerNode<DerivedType, LayerType, ValueType>::WriteToArchive(utilities::Archiver& archiver) const
-    {
-        NeuralNetworkLayerNodeBase<ValueType>::WriteToArchive(archiver);
-        archiver["inputLayout"] << _inputLayout;
-        archiver["outputLayout"] << GetOutputMemoryLayout();
-
-        std::vector<size_t> inputShape = _inputShape;
-        archiver["inputShape"] << inputShape;
-
-        archiver["layer"] << _layer;
-    }
-
-    template <typename DerivedType, typename LayerType, typename ValueType>
-    void NeuralNetworkLayerNode<DerivedType, LayerType, ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
-    {
-        NeuralNetworkLayerNodeBase<ValueType>::ReadFromArchive(archiver);
-        archiver["inputLayout"] >> _inputLayout;
-        model::PortMemoryLayout outputLayout;
-        archiver["outputLayout"] >> outputLayout;
-        _output.SetMemoryLayout(outputLayout);
-
-        std::vector<size_t> inputShape;
-        archiver["inputShape"] >> inputShape;
-        _inputShape = math::TensorShape{ inputShape };
-
-        _inputTensor = typename LayerType::TensorType(_inputShape);
-        _layer.GetLayerParameters().input = _inputTensor;
-        archiver["layer"] >> _layer;
-    }
-
-    template <typename DerivedType, typename LayerType, typename ValueType>
-    void NeuralNetworkLayerNode<DerivedType, LayerType, ValueType>::Compute() const
-    {
-        auto inputVector = _input.GetValue();
-        auto inputTensor = typename LayerType::ConstTensorReferenceType{ inputVector.data(), _inputTensor.GetShape() };
-        _inputTensor.CopyFrom(inputTensor);
-        _layer.Compute();
-        const auto& outputTensor = _layer.GetOutput();
-        _output.SetOutput(outputTensor.ToArray());
-    }
-
-    template <typename LayerType>
-    typename LayerType::LayerParameters GetLayerNodeParameters(const typename LayerType::TensorType& inputTensor, const typename LayerType::LayerParameters& layerParameters)
-    {
-        return { inputTensor, layerParameters.inputPaddingParameters, layerParameters.outputShape, layerParameters.outputPaddingParameters };
-    }
-} // namespace nodes
-} // namespace ell
diff --git a/libraries/nodes/tcc/NeuralNetworkPredictorNode.tcc b/libraries/nodes/tcc/NeuralNetworkPredictorNode.tcc
deleted file mode 100644
index c1253e04c..000000000
--- a/libraries/nodes/tcc/NeuralNetworkPredictorNode.tcc
+++ /dev/null
@@ -1,98 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     NeuralNetworkPredictorNode.tcc (nodes)
-//  Authors:  Chuck Jacobs, Byron Changuion
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace nodes
-{
-    //
-    // Helper functions
-    //
-    namespace
-    {
-        template <typename LayerType, typename LayerNodeType, typename ValueType>
-        NeuralNetworkLayerNodeBase<ValueType>* TryAddLayerNode(model::ModelTransformer& transformer, predictors::neural::Layer<ValueType>& layer, const model::OutputPort<ValueType>& layerInputs, const typename NeuralNetworkPredictorNode<ValueType>::NetworkCompileOptions& options, typename NeuralNetworkPredictorNode<ValueType>::NetworkCompileState& state)
-        {
-            auto typedLayer = dynamic_cast<LayerType*>(&layer);
-            if (typedLayer != nullptr)
-            {
-                return transformer.AddNode<LayerNodeType>(layerInputs, *typedLayer);
-            }
-            return nullptr;
-        }
-
-        template <typename LayerType, typename LayerNodeType, typename SecondValueType, typename ValueType>
-        NeuralNetworkLayerNodeBase<ValueType>* TryAddLayerNodeWithTwoInputs(model::ModelTransformer& transformer, predictors::neural::Layer<ValueType>& layer, const model::OutputPort<ValueType>& layerInputs, const model::OutputPort<SecondValueType>& secondInput, const typename NeuralNetworkPredictorNode<ValueType>::NetworkCompileOptions& options, typename NeuralNetworkPredictorNode<ValueType>::NetworkCompileState& state)
-        {
-            auto typedLayer = dynamic_cast<LayerType*>(&layer);
-            if (typedLayer != nullptr)
-            {
-                return transformer.AddNode<LayerNodeType>(layerInputs, secondInput, *typedLayer);
-            }
-            return nullptr;
-        }
-    } // namespace
-
-    template <typename ValueType>
-    NeuralNetworkLayerNodeBase<ValueType>* NeuralNetworkPredictorNode<ValueType>::AddLayerNode(model::ModelTransformer& transformer, predictors::neural::Layer<ValueType>& layer, const model::OutputPort<ValueType>& layerInputs, const NetworkCompileOptions& options, NetworkCompileState& state) const
-    {
-        NeuralNetworkLayerNodeBase<ValueType>* node = nullptr;
-
-        if (layer.template IsA<const predictors::neural::ActivationLayer<ValueType>>())
-        {
-            auto& activationLayer = layer.template As<predictors::neural::ActivationLayer<ValueType>>();
-            auto paf = dynamic_cast<const predictors::neural::ParametricReLUActivation<ValueType>*>(activationLayer.GetActivationFunction().GetImpl());
-            if (paf)
-            {
-                // Ah, then this one is special, we have to use ParametricReLUActivationLayerNode in this case.
-                return TryAddLayerNode<predictors::neural::ActivationLayer<ValueType>, ParametricReLUActivationLayerNode<ValueType>>(transformer, layer, layerInputs, options, state);
-            }
-        }
-
-        node = TryAddLayerNode<predictors::neural::ActivationLayer<ValueType>, ActivationLayerNode<ValueType>>(transformer, layer, layerInputs, options, state);
-        if (node != nullptr) return node;
-
-        node = TryAddLayerNode<predictors::neural::BatchNormalizationLayer<ValueType>, BatchNormalizationLayerNode<ValueType>>(transformer, layer, layerInputs, options, state);
-        if (node != nullptr) return node;
-
-        node = TryAddLayerNode<predictors::neural::BiasLayer<ValueType>, BiasLayerNode<ValueType>>(transformer, layer, layerInputs, options, state);
-        if (node != nullptr) return node;
-
-        node = TryAddLayerNode<predictors::neural::BinaryConvolutionalLayer<ValueType>, BinaryConvolutionalLayerNode<ValueType>>(transformer, layer, layerInputs, options, state);
-        if (node != nullptr) return node;
-
-        node = TryAddLayerNode<predictors::neural::ConvolutionalLayer<ValueType>, ConvolutionalLayerNode<ValueType>>(transformer, layer, layerInputs, options, state);
-        if (node != nullptr) return node;
-
-        node = TryAddLayerNode<predictors::neural::FullyConnectedLayer<ValueType>, FullyConnectedLayerNode<ValueType>>(transformer, layer, layerInputs, options, state);
-        if (node != nullptr) return node;
-
-        //
-        // Pooling layer
-        //
-
-        node = TryAddLayerNode<predictors::neural::PoolingLayer<ValueType, predictors::neural::MaxPoolingFunction>, PoolingLayerNode<ValueType, predictors::neural::MaxPoolingFunction>>(transformer, layer, layerInputs, options, state);
-        if (node != nullptr) return node;
-
-        node = TryAddLayerNode<predictors::neural::PoolingLayer<ValueType, predictors::neural::MeanPoolingFunction>, PoolingLayerNode<ValueType, predictors::neural::MeanPoolingFunction>>(transformer, layer, layerInputs, options, state);
-        if (node != nullptr) return node;
-
-        node = TryAddLayerNode<predictors::neural::RegionDetectionLayer<ValueType>, RegionDetectionLayerNode<ValueType>>(transformer, layer, layerInputs, options, state);
-        if (node != nullptr) return node;
-
-        node = TryAddLayerNode<predictors::neural::ScalingLayer<ValueType>, ScalingLayerNode<ValueType>>(transformer, layer, layerInputs, options, state);
-        if (node != nullptr) return node;
-
-        node = TryAddLayerNode<predictors::neural::SoftmaxLayer<ValueType>, SoftmaxLayerNode<ValueType>>(transformer, layer, layerInputs, options, state);
-        if (node != nullptr) return node;
-
-        auto name = layer.GetRuntimeTypeName();
-        throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "Unknown layer type in refine: " + name);
-    }
-} // namespace nodes
-} // namespace ell
diff --git a/libraries/nodes/tcc/ReceptiveFieldMatrixNode.tcc b/libraries/nodes/tcc/ReceptiveFieldMatrixNode.tcc
deleted file mode 100644
index 6e96c131b..000000000
--- a/libraries/nodes/tcc/ReceptiveFieldMatrixNode.tcc
+++ /dev/null
@@ -1,415 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     ReceptiveFieldMatrixNode.tcc (nodes)
-//  Authors:  Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#include <model/include/CompilableNodeUtilities.h>
-#include <model/include/IRMapCompiler.h>
-#include <model/include/OutputNode.h>
-
-#include <utilities/include/Exception.h>
-
-#include <cassert>
-#include <string>
-#include <vector>
-
-namespace ell
-{
-namespace nodes
-{
-    namespace
-    {
-        //
-        // Functions
-        //
-
-        // Note: this function is inline to suppress a compiler warning about it being unneeded
-        inline emitters::LLVMValue GetValueFromVolume(emitters::IRFunctionEmitter& function,
-                                                      emitters::LLVMValue inputVolume,
-                                                      const model::PortMemoryLayout& inputLayout,
-                                                      std::array<int, 3> dataOrder,
-                                                      emitters::IRLocalScalar valueRow,
-                                                      emitters::IRLocalScalar valueColumn,
-                                                      emitters::IRLocalScalar valueChannel)
-        {
-            const auto rowStride = inputLayout.GetExtent(0);
-            const auto columnStride = inputLayout.GetExtent(1);
-            const auto channelStride = inputLayout.GetExtent(2);
-
-            auto index = function.LocalScalar();
-            if (dataOrder == std::array<int, 3>({ 0, 1, 2 }))
-            {
-                // row, column, channel order
-                index = valueRow * (columnStride * channelStride) + (valueColumn * channelStride) + valueChannel;
-            }
-            else
-            {
-                // channel, row, column order
-                index = valueChannel * (rowStride * columnStride) + (valueRow * columnStride) + valueColumn;
-            }
-
-            return function.ValueAt(inputVolume, index);
-        }
-
-        template <typename ValueType>
-        emitters::LLVMValue GetValueFromPaddedVolume(emitters::IRFunctionEmitter& function,
-                                                     emitters::LLVMValue inputVolume,
-                                                     const model::PortMemoryLayout& inputLayout,
-                                                     int convPadding,
-                                                     std::array<int, 3> dataOrder,
-                                                     emitters::IRLocalScalar inputRow,
-                                                     emitters::IRLocalScalar inputColumn,
-                                                     emitters::IRLocalScalar inputChannel)
-        {
-            const int inputHeight = inputLayout.GetActiveSize(0);
-            const int inputWidth = inputLayout.GetActiveSize(1);
-            const int inputDepth = inputLayout.GetActiveSize(2);
-            const int inputPadding = inputLayout.GetOffset(0); // a proxy for the padding
-
-            const int extraPaddingVal = convPadding - inputPadding; // amount by which the convolution's desired padding exceeds input's
-            auto extraPadding = function.LocalScalar(extraPaddingVal);
-            if (extraPaddingVal > 0) // known at compile-time
-            {
-                auto valueRow = inputRow - extraPadding;
-                auto valueColumn = inputColumn - extraPadding;
-
-                auto rowBad = (valueRow < 0) || (valueRow >= inputHeight);
-                auto colBad = (valueColumn < 0) || (valueColumn >= inputWidth);
-                auto outOfBounds = rowBad || colBad;
-
-                emitters::LLVMValue returnValue = function.Variable(emitters::GetVariableType<ValueType>(), "returnVal");
-                function.If(outOfBounds, [=](emitters::IRFunctionEmitter& function) {
-                            function.StoreZero(returnValue);
-                        })
-                    .Else([=](emitters::IRFunctionEmitter& function) {
-                        // channel, row, col order
-                        auto index1 = valueRow * (inputWidth * inputDepth);
-                        auto index2 = valueColumn * inputDepth;
-                        auto index = index1 + index2 + inputChannel;
-                        auto val = function.ValueAt(inputVolume, index);
-
-                        // Note: we can't return from within an if/else block, so we store the value in a local variable
-                        function.Store(returnValue, val);
-                    });
-
-                return function.Load(returnValue);
-            }
-
-            if (extraPaddingVal != 0) // negative
-            {
-                inputRow = inputRow + extraPadding;
-                inputColumn = inputColumn + extraPadding;
-            }
-            return GetValueFromVolume(function, inputVolume, inputLayout, dataOrder, inputRow, inputColumn, inputChannel);
-        }
-
-        template <typename ValueType>
-        void EmitReceptiveFieldToColumns(emitters::IRFunctionEmitter& function,
-                                         emitters::LLVMValue inputVolume,
-                                         const model::PortMemoryLayout& inputLayout,
-                                         int filterWidth,
-                                         int stride,
-                                         int convPadding, // amount of padding to assume around the image -- determines output size
-                                         std::array<int, 3> dataOrder,
-                                         int outputWidth,
-                                         int outputHeight,
-                                         emitters::LLVMValue outputMatrix)
-        {
-            // Model parameters
-            const auto inputHeight = inputLayout.GetLogicalDimensionActiveSize(0);
-            const auto inputWidth = inputLayout.GetLogicalDimensionActiveSize(1);
-            const auto inputDepth = inputLayout.GetLogicalDimensionActiveSize(2);
-            const auto fieldVolumeSize = filterWidth * filterWidth * inputDepth;
-            const auto numOutputColumns = static_cast<int>(outputWidth * outputHeight);
-
-            // Input (I): d x h x w (planar)
-            // Output (S): (d * k * k) x (outputHeight * outputWidth) ==  fieldVolumeSize x outputImageSize
-
-            // Example
-            // k = 3, d = 2
-            //
-            //      A B C D    a b c d
-            // I =  E F G H    e f g h
-            //      I J K L    i j k l
-            //      M N O P    m n o p
-            //
-            //      . . . .  . A B C  D E F G  H I J K
-            //      . . . .  . a b c  d e f g  h i j k
-            //      . . . .  A B C D  E F G H  I J K L
-            //      . . . .  a b c d  e f g h  i j k l
-            //      . . . .  B C D E  F G H I  J K L M
-            //      . . . .  b c d e  f g h i  j k l m
-            //
-            //      . A B C  D E F G  H I J K  L M N O
-            //      . a b c  d e f g  h i j k  l m n o
-            // S =  A B C D  E F G H  I J K L  M N O P
-            //      a b c d  e f g h  i j k l  m n o p
-            //      B C D E  F G H I  J K L M  N O P .
-            //      b c d e  f g h i  j k l m  n o p .
-            //
-            //      D E F G  H I J K  L M N O  . . . .
-            //      d e f g  h i j k  l m n o  . . . .
-            //      E F G H  I J K L  M N O P  . . . .
-            //      e f g h  i j k l  m n o p  . . . .
-            //      F G H I  J K L M  N O P .  . . . .
-            //      f g h i  j k l m  n o p .  . . . .
-            //
-            // Note that the middle d=2 rows of S are the entire image, linearized:
-            // A B C D E F G H I J K L M N O P a b c d e f g h i j k l m n o p
-
-            // const int extraPadding = (int)convPadding - (int)inputPadding; // extraPadding is the amount of extra padding we need to do, on top of what's in the input data
-            const int extraPadding = convPadding;
-            const bool useContiguousReshape = (dataOrder == std::array<int, 3>({ { 2, 0, 1 } })) && (stride == 1); // channel, row, column order, unit stride
-            if (useContiguousReshape)
-            {
-                // assert(inputPadding == 0 && "Input data must not be padded");
-                // Points to the beginning of the input volume
-                emitters::LLVMValue inputPtr = function.PointerOffset(inputVolume, 0);
-
-                // Points to the beginning of the outputMatrix
-                emitters::LLVMValue outputPtr = function.PointerOffset(outputMatrix, 0);
-
-                // Unroll outer loops
-                for (int fy = 0; fy < filterWidth; ++fy)
-                {
-                    for (int fx = 0; fx < filterWidth; ++fx)
-                    {
-                        // `outputRow` is the row of the output matrix to start writing to. Multiplied by `inputDepth`, because
-                        // we're going to memcpy `inputDepth` rows at once
-                        int outputRow = (fy * filterWidth + fx) * inputDepth;
-
-                        int outputOffset1 = inputWidth * (extraPadding - fy); // where to start writing this row in the output
-                        int outputOffset2 = (extraPadding - fx); // where to start writing this row in the output
-                        int inputOffset = 0; // where to start reading from for this row
-                        if (outputOffset1 < 0)
-                        {
-                            inputOffset -= outputOffset1;
-                            outputOffset1 = 0;
-                        }
-                        if (outputOffset2 < 0)
-                        {
-                            inputOffset -= outputOffset2;
-                            outputOffset2 = 0;
-                        }
-                        int outputOffset = outputOffset1 + outputOffset2;
-                        int count = (inputWidth * inputHeight * inputDepth) - inputOffset - outputOffset;
-                        outputOffset += outputRow * numOutputColumns;
-
-                        // For this output row, copy what we need from the input image
-                        function.MemoryCopy<ValueType>(inputPtr, inputOffset, outputPtr, outputOffset, count);
-                        const int outputRowOffset = outputRow * numOutputColumns;
-
-                        // Zero out the padding areas
-                        // BUG: explicit capture-by-ref entries are here to work around a GCC bug
-                        function.For(inputDepth, [=, &fx, &fy, &extraPadding, &inputWidth, &inputHeight, &outputWidth, &numOutputColumns](emitters::IRFunctionEmitter& function, emitters::LLVMValue channelValue) {
-                            auto channel = function.LocalScalar(channelValue);
-                            auto outputDepthOffset = channel * numOutputColumns;
-
-                            // Points to the beginning of the current channel in the outputMatrix
-                            auto outputChannelPtr = function.PointerOffset(outputMatrix, outputDepthOffset);
-
-                            uint8_t paddingValue = 0;
-                            if (fy < extraPadding)
-                            {
-                                // zero out full image rows at beginning of image
-                                int count = (extraPadding - fy) * outputWidth;
-                                int begin = 0;
-                                function.MemorySet<ValueType>(outputChannelPtr, outputRowOffset + begin, function.Literal<uint8_t>(paddingValue), count);
-                            }
-                            else if (fy > extraPadding)
-                            {
-                                // zero out full image rows at end of image
-                                int count = (fy - extraPadding) * outputWidth;
-                                int begin = numOutputColumns - count;
-                                assert(begin >= 0);
-                                function.MemorySet<ValueType>(outputChannelPtr, outputRowOffset + begin, function.Literal<uint8_t>(paddingValue), count);
-                            }
-
-                            if (fx < extraPadding)
-                            {
-                                // zero out elements at beginning of each row
-                                int count = extraPadding - fx;
-                                // BUG: explicit capture-by-ref entries are here to work around a GCC bug
-                                function.For(inputHeight, [=, &inputWidth, &outputRowOffset](emitters::IRFunctionEmitter& function, emitters::LLVMValue indexValue) {
-                                    auto index = function.LocalScalar(indexValue);
-                                    auto begin = index * inputWidth;
-                                    auto offset = begin + outputRowOffset;
-                                    function.MemorySet<ValueType>(outputChannelPtr, offset, function.Literal<uint8_t>(paddingValue), count);
-                                });
-                            }
-                            else if (fx > extraPadding)
-                            {
-                                // zero out elements at end of each row
-                                int count = fx - extraPadding;
-                                // BUG: explicit capture-by-ref entries are here to work around a GCC bug
-                                function.For(inputHeight, [=, &inputWidth, &outputRowOffset](emitters::IRFunctionEmitter& function, emitters::LLVMValue indexValue) {
-                                    auto index = function.LocalScalar(indexValue);
-                                    auto begin = ((index + 1) * inputWidth) - count;
-                                    auto offset = begin + outputRowOffset;
-                                    function.MemorySet<ValueType>(outputChannelPtr, offset, function.Literal<uint8_t>(paddingValue), count);
-                                });
-                            }
-                        });
-                    }
-                }
-            }
-            else // Normal, single value-at-a-time method
-            {
-                // The outer loop iterates over all d * k * k entries in the receptive field
-                function.For(fieldVolumeSize, [=](emitters::IRFunctionEmitter& function, emitters::LLVMValue fValue) {
-                    auto f = function.LocalScalar(fValue);
-                    auto fieldChannel = function.LocalScalar();
-                    auto fieldColumn = function.LocalScalar();
-                    auto fieldRow = function.LocalScalar();
-
-                    // TODO: use the entries of dataOrder to compute the indices
-                    if (dataOrder == std::array<int, 3>({ { 0, 1, 2 } })) // row, column, channel order
-                    {
-                        fieldChannel = f % inputDepth;
-                        auto fDivDepth = f / inputDepth;
-                        fieldColumn = fDivDepth % filterWidth;
-                        fieldRow = fDivDepth / filterWidth;
-                    }
-                    else // channel, row, column order
-                    {
-                        fieldColumn = f % filterWidth;
-                        auto fDivColumns = f / filterWidth;
-                        fieldRow = fDivColumns % filterWidth;
-                        fieldChannel = fDivColumns / filterWidth;
-                    }
-
-                    // Now for each receptive field entry, iterate over all h * w locations in the output image
-                    function.For(outputHeight, [=, &fieldRow, &fieldColumn](emitters::IRFunctionEmitter& function, emitters::LLVMValue outputImageRowValue) {
-                        auto outputImageRow = function.LocalScalar(outputImageRowValue);
-                        auto inputRow = outputImageRow * stride;
-                        function.For(outputWidth, [=, &fieldRow, &fieldColumn, &inputRow](emitters::IRFunctionEmitter& function, emitters::LLVMValue outputImageColumnValue) {
-                            auto outputImageColumn = function.LocalScalar(outputImageColumnValue);
-                            auto inputColumn = outputImageColumn * stride;
-
-                            // outRowOffset is the offset to the f'th row in the output S matrix
-                            auto outRowOffset = f * (outputHeight * outputWidth);
-
-                            // outColRowOffset is the offset to the column of the S matrix where `outputImageRow` begins
-                            auto outColRowOffset = outputImageRow * outputWidth;
-                            // outputIndex is the index of the entry in S to write to
-                            auto outputIndex = outRowOffset + (outColRowOffset + outputImageColumn);
-
-                            // input row and column in the input image
-                            auto entryRow = inputRow + fieldRow;
-                            auto entryColumn = inputColumn + fieldColumn;
-                            auto volumeValue = GetValueFromPaddedVolume<ValueType>(function, inputVolume, inputLayout, extraPadding, dataOrder, entryRow, entryColumn, fieldChannel);
-                            function.SetValueAt(outputMatrix, outputIndex, volumeValue);
-                        });
-                    });
-                });
-            }
-        }
-    } // namespace
-
-    //
-    // ReceptiveFieldMatrixNode
-    //
-    template <typename ValueType>
-    ReceptiveFieldMatrixNode<ValueType>::ReceptiveFieldMatrixNode() :
-        CompilableNode({ &_input }, { &_output }),
-        _input(this, {}, defaultInputPortName),
-        _output(this, defaultOutputPortName, 0),
-        _filterWidth(0),
-        _stride(0),
-        _convolutionPadding(0),
-        _dataOrder({ { 0, 1, 2 } }),
-        _outputWidth(0),
-        _outputHeight(0)
-    {
-    }
-
-    template <typename ValueType>
-    ReceptiveFieldMatrixNode<ValueType>::ReceptiveFieldMatrixNode(const model::OutputPort<ValueType>& input, const model::PortMemoryLayout& inputMemoryLayout, int filterWidth, int stride, int convolutionPadding, std::array<int, 3> dataOrder, int outputWidth, int outputHeight) :
-        CompilableNode({ &_input }, { &_output }),
-        _input(this, input, defaultInputPortName),
-        _output(this, defaultOutputPortName, model::PortMemoryLayout(model::MemoryShape{ outputWidth * outputHeight, filterWidth * filterWidth * inputMemoryLayout.GetLogicalDimensionActiveSize(2) }, model::DimensionOrder{ dataOrder })),
-        _inputMemoryLayout(inputMemoryLayout),
-        _filterWidth(filterWidth),
-        _stride(stride),
-        _convolutionPadding(convolutionPadding),
-        _dataOrder(dataOrder),
-        _outputWidth(outputWidth),
-        _outputHeight(outputHeight)
-    {
-        if (inputMemoryLayout.NumDimensions() != 3)
-        {
-            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "ReceptiveFieldMatrixNode: inputMemoryLayout must have 3 dimensions");
-        }
-    }
-
-    template <typename ValueType>
-    void ReceptiveFieldMatrixNode<ValueType>::Copy(model::ModelTransformer& transformer) const
-    {
-        const auto& newPortElements = transformer.GetCorrespondingInputs(_input);
-        auto newNode = transformer.AddNode<ReceptiveFieldMatrixNode>(newPortElements, GetInputMemoryLayout(), _filterWidth, _stride, _convolutionPadding, _dataOrder, _outputWidth, _outputHeight);
-        transformer.MapNodeOutput(this->output, newNode->output);
-    }
-
-    template <typename ValueType>
-    void ReceptiveFieldMatrixNode<ValueType>::Compute() const
-    {
-        throw utilities::LogicException(utilities::LogicExceptionErrors::notImplemented);
-    }
-
-    template <typename ValueType>
-    void ReceptiveFieldMatrixNode<ValueType>::Compile(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
-    {
-        emitters::LLVMValue pInput = compiler.EnsurePortEmitted(this->input);
-        emitters::LLVMValue pOutput = compiler.EnsurePortEmitted(this->output);
-
-        const auto& inputLayout = this->GetInputMemoryLayout();
-        assert(inputLayout.NumDimensions() == 3);
-
-        // Re-shape input
-        EmitReceptiveFieldToColumns<ValueType>(function, pInput, inputLayout, _filterWidth, _stride, _convolutionPadding, _dataOrder, _outputWidth, _outputHeight, pOutput);
-    }
-
-    template <typename ValueType>
-    void ReceptiveFieldMatrixNode<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
-    {
-        Node::WriteToArchive(archiver);
-        archiver[defaultInputPortName] << _input;
-        archiver[defaultOutputPortName] << _output;
-        archiver["inputLayout"] << _inputMemoryLayout;
-
-        archiver["filterWidth"] << _filterWidth;
-        archiver["stride"] << _stride;
-        ;
-        archiver["convolutionPadding"] << _convolutionPadding;
-
-        std::vector<int> dataOrder(_dataOrder.begin(), _dataOrder.end());
-        archiver["dataOrder"] << dataOrder;
-
-        archiver["outputWidth"] << _outputWidth;
-        archiver["outputHeight"] << _outputHeight;
-    }
-
-    template <typename ValueType>
-    void ReceptiveFieldMatrixNode<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
-    {
-        Node::ReadFromArchive(archiver);
-        archiver[defaultInputPortName] >> _input;
-        archiver[defaultOutputPortName] >> _output;
-        archiver["inputLayout"] >> _inputMemoryLayout;
-
-        archiver["filterWidth"] >> _filterWidth;
-        archiver["stride"] >> _stride;
-        archiver["convolutionPadding"] >> _convolutionPadding;
-
-        std::vector<int> dataOrder;
-        archiver["dataOrder"] >> dataOrder;
-        std::copy(dataOrder.begin(), dataOrder.end(), _dataOrder.begin());
-
-        archiver["outputWidth"] >> _outputWidth;
-        archiver["outputHeight"] >> _outputHeight;
-    }
-} // namespace nodes
-} // namespace ell
diff --git a/libraries/nodes/tcc/ReorderDataNode.tcc b/libraries/nodes/tcc/ReorderDataNode.tcc
deleted file mode 100644
index 372079d60..000000000
--- a/libraries/nodes/tcc/ReorderDataNode.tcc
+++ /dev/null
@@ -1,377 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     ReorderDataNode.tcc (nodes)
-//  Authors:  Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#include <model/include/CompilableNodeUtilities.h>
-#include <model/include/IRMapCompiler.h>
-#include <model/include/OutputNode.h>
-
-#include <vector>
-
-namespace ell
-{
-namespace nodes
-{
-    namespace ReorderDataNodeDetail
-    {
-        using emitters::IRLocalScalar;
-        using model::DimensionOrder;
-        using model::MemoryCoordinates;
-
-        inline MemoryCoordinates LogicalToPhysical(const MemoryCoordinates& coordinates, const DimensionOrder& order)
-        {
-            const int numDimensions = coordinates.NumDimensions();
-            std::vector<int> result(numDimensions);
-            for (int index = 0; index < numDimensions; ++index)
-            {
-                result[index] = coordinates[order[index]];
-            }
-            return { result };
-        }
-
-        inline std::vector<IRLocalScalar> LogicalToPhysical(const std::vector<IRLocalScalar>& coordinates,
-                                                            const DimensionOrder& order)
-        {
-            const int numDimensions = order.NumDimensions();
-            // copying coordinates[0] just because IRLocalScalar doesn't have a default c'tor
-            std::vector<IRLocalScalar> result(numDimensions, coordinates[0]);
-            for (int index = 0; index < numDimensions; ++index)
-            {
-                result[index] = coordinates[order[index]];
-            }
-            return result;
-        }
-
-        inline MemoryCoordinates PhysicalToLogical(const MemoryCoordinates& coordinates, const DimensionOrder& order)
-        {
-            const int numDimensions = coordinates.NumDimensions();
-            std::vector<int> result(numDimensions);
-            for (int index = 0; index < numDimensions; ++index)
-            {
-                result[order[index]] = coordinates[index];
-            }
-            return { result };
-        }
-
-        inline std::vector<IRLocalScalar> PhysicalToLogical(const std::vector<IRLocalScalar>& coordinates,
-                                                            const DimensionOrder& order)
-        {
-            const int numDimensions = order.NumDimensions();
-            // copying coordinates[0] just because IRLocalScalar doesn't have a default c'tor
-            std::vector<emitters::IRLocalScalar> result(numDimensions, coordinates[0]);
-            for (int index = 0; index < numDimensions; ++index)
-            {
-                result[order[index]] = coordinates[index];
-            }
-            return result;
-        }
-    } // namespace ReorderDataNodeDetail
-
-    //
-    // ReorderDataNode
-    //
-    template <typename ValueType>
-    ReorderDataNode<ValueType>::ReorderDataNode() :
-        CompilableNode({ &_input }, { &_output }),
-        _input(this, {}, defaultInputPortName),
-        _output(this, defaultOutputPortName, 0)
-    {}
-
-    //
-    // Without reordering ("reshape" / slicing)
-    //
-    template <typename ValueType>
-    ReorderDataNode<ValueType>::ReorderDataNode(const model::OutputPort<ValueType>& input,
-                                                const model::PortMemoryLayout& outputMemoryLayout,
-                                                ValueType paddingValue) :
-        CompilableNode({ &_input }, { &_output }),
-        _input(this, input, defaultInputPortName),
-        _output(this, defaultOutputPortName, outputMemoryLayout),
-        _paddingValue(paddingValue)
-    {
-        _inputMemoryLayout = _input.GetMemoryLayout();
-        if (_inputMemoryLayout.NumDimensions() != outputMemoryLayout.NumDimensions())
-        {
-            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument,
-                                            "Error: input and output layouts must have same dimension");
-        }
-    }
-
-    template <typename ValueType>
-    ReorderDataNode<ValueType>::ReorderDataNode(const model::OutputPort<ValueType>& input,
-                                                const model::PortMemoryLayout& inputMemoryLayout,
-                                                const model::PortMemoryLayout& outputMemoryLayout,
-                                                ValueType paddingValue) :
-        CompilableNode({ &_input }, { &_output }),
-        _input(this, input, defaultInputPortName),
-        _output(this, defaultOutputPortName, outputMemoryLayout),
-        _inputMemoryLayout(inputMemoryLayout),
-        _paddingValue(paddingValue)
-    {
-        if (inputMemoryLayout.NumDimensions() != outputMemoryLayout.NumDimensions())
-        {
-            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument,
-                                            "Error: input and output layouts must have same dimension");
-        }
-    }
-
-    //
-    // With reordering ("reshape" / slicing, followed by transpose / dimension reordering)
-    //
-    template <typename ValueType>
-    ReorderDataNode<ValueType>::ReorderDataNode(const model::OutputPort<ValueType>& input,
-                                                const model::DimensionOrder& order) :
-        CompilableNode({ &_input }, { &_output }),
-        _input(this, input, defaultInputPortName),
-        _output(this, defaultOutputPortName, _input.GetMemoryLayout().ReorderedCopy(order))
-    {
-        _inputMemoryLayout = _input.GetMemoryLayout();
-        if (_inputMemoryLayout.NumDimensions() != order.NumDimensions())
-        {
-            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument,
-                                            "Error: input and output layouts must have same dimension");
-        }
-    }
-
-    template <typename ValueType>
-    ReorderDataNode<ValueType>::ReorderDataNode(const model::OutputPort<ValueType>& input,
-                                                const model::PortMemoryLayout& outputMemoryLayout,
-                                                const model::DimensionOrder& order,
-                                                ValueType paddingValue) :
-        CompilableNode({ &_input }, { &_output }),
-        _input(this, input, defaultInputPortName),
-        _output(this, defaultOutputPortName, outputMemoryLayout.ReorderedCopy(order)),
-        _paddingValue(paddingValue)
-    {
-        _inputMemoryLayout = _input.GetMemoryLayout();
-        if (_inputMemoryLayout.NumDimensions() != outputMemoryLayout.NumDimensions())
-        {
-            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument,
-                                            "Error: input and output layouts must have same dimension");
-        }
-    }
-
-    template <typename ValueType>
-    ReorderDataNode<ValueType>::ReorderDataNode(const model::OutputPort<ValueType>& input,
-                                                const model::PortMemoryLayout& inputMemoryLayout,
-                                                const model::PortMemoryLayout& outputMemoryLayout,
-                                                const model::DimensionOrder& order,
-                                                ValueType paddingValue) :
-        CompilableNode({ &_input }, { &_output }),
-        _input(this, input, defaultInputPortName),
-        _output(this, defaultOutputPortName, outputMemoryLayout.ReorderedCopy(order)),
-        _inputMemoryLayout(inputMemoryLayout),
-        _paddingValue(paddingValue)
-    {
-        if (inputMemoryLayout.NumDimensions() != outputMemoryLayout.NumDimensions())
-        {
-            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument,
-                                            "Error: input and output layouts must have same dimension");
-        }
-    }
-
-    template <typename ValueType>
-    model::MemoryCoordinates ReorderDataNode<ValueType>::ReorderOutputToInputLocation(
-        model::MemoryCoordinates physicalOutputCoordinates) const
-    {
-        const auto inputDimensionOrder = GetInputMemoryLayout().GetLogicalDimensionOrder();
-        const auto outputDimensionOrder = GetOutputMemoryLayout().GetLogicalDimensionOrder();
-
-        auto logicalCoordinates =
-            ReorderDataNodeDetail::PhysicalToLogical(physicalOutputCoordinates, outputDimensionOrder);
-        auto physicalInputCoordinates =
-            ReorderDataNodeDetail::LogicalToPhysical(logicalCoordinates, inputDimensionOrder);
-        return physicalInputCoordinates;
-    }
-
-    // TODO: for each dimension, loop over minimum of input and output interval. Then we don't have to check if the value is out-of-bounds
-    template <typename ValueType>
-    std::vector<emitters::IRLocalScalar> ReorderDataNode<ValueType>::ReorderOutputToInputLocation(
-        std::vector<emitters::IRLocalScalar> physicalOutputCoordinates) const
-    {
-        const auto inputDimensionOrder = GetInputMemoryLayout().GetLogicalDimensionOrder();
-        const auto outputDimensionOrder = GetOutputMemoryLayout().GetLogicalDimensionOrder();
-
-        auto logicalCoordinates =
-            ReorderDataNodeDetail::PhysicalToLogical(physicalOutputCoordinates, outputDimensionOrder);
-        auto physicalInputCoordinates =
-            ReorderDataNodeDetail::LogicalToPhysical(logicalCoordinates, inputDimensionOrder);
-        return physicalInputCoordinates;
-    }
-
-    template <typename ValueType>
-    void ReorderDataNode<ValueType>::Copy(model::ModelTransformer& transformer) const
-    {
-        const auto& newPortElements = transformer.GetCorrespondingInputs(_input);
-        auto newNode = transformer.AddNode<ReorderDataNode>(newPortElements,
-                                                            _inputMemoryLayout,
-                                                            _output.GetMemoryLayout(),
-                                                            _paddingValue);
-        transformer.MapNodeOutput(this->output, newNode->output);
-    }
-
-    template <typename ValueType>
-    void ReorderDataNode<ValueType>::ComputeDimensionLoop(const model::PortMemoryLayout& inputMemoryLayout,
-                                                          const model::PortMemoryLayout& outputMemoryLayout,
-                                                          int dimension,
-                                                          std::vector<int>& coordinates,
-                                                          std::vector<ValueType>& output) const
-    {
-        if (dimension == inputMemoryLayout.NumDimensions() - 1) // last dimension
-        {
-            for (int index = 0; index < outputMemoryLayout.GetActiveSize(dimension); ++index)
-            {
-                coordinates[dimension] = index;
-
-                auto inputLocation = ReorderOutputToInputLocation(coordinates);
-                auto inputIndex = inputMemoryLayout.GetEntryOffset(inputLocation);
-                auto outputIndex = outputMemoryLayout.GetEntryOffset(coordinates);
-                output[outputIndex] = _input[inputIndex];
-            }
-        }
-        else
-        {
-            for (int index = 0; index < outputMemoryLayout.GetActiveSize(dimension); ++index)
-            {
-                coordinates[dimension] = index;
-                ComputeDimensionLoop(inputMemoryLayout, outputMemoryLayout, dimension + 1, coordinates, output);
-            }
-        }
-    }
-
-    // TODO: for each dimension, loop over minimum of input and output interval. Then we don't have to check if the value is out-of-bounds
-    template <typename ValueType>
-    void ReorderDataNode<ValueType>::Compute() const
-    {
-        const auto inputMemoryLayout = GetInputMemoryLayout();
-        const auto outputMemoryLayout = _output.GetMemoryLayout();
-        if (outputMemoryLayout == inputMemoryLayout)
-        {
-            _output.SetOutput(_input.GetValue());
-        }
-        else
-        {
-            const int numDimensions = inputMemoryLayout.NumDimensions();
-            const int outputSize = outputMemoryLayout.GetMemorySize();
-            if (numDimensions != outputMemoryLayout.NumDimensions())
-            {
-                throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument,
-                                                "Error: input and output layouts must have same dimension");
-            }
-
-            std::vector<ValueType> output(outputSize, _paddingValue); // initialize to padding value
-            std::vector<int> coordinates(numDimensions);
-            ComputeDimensionLoop(inputMemoryLayout, outputMemoryLayout, 0, coordinates, output);
-            _output.SetOutput(output);
-        }
-    }
-
-    template <typename ValueType>
-    void ReorderDataNode<ValueType>::Compile(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
-    {
-        assert(this->input.Size() > 1);
-        auto input = function.LocalArray(compiler.EnsurePortEmitted(this->input));
-        auto output = function.LocalArray(compiler.EnsurePortEmitted(this->output, _paddingValue));
-
-        const auto inputMemoryLayout = GetInputMemoryLayout();
-        const auto outputMemoryLayout = GetOutputMemoryLayout();
-
-        const int numDimensions = inputMemoryLayout.NumDimensions();
-        const int outputSize = outputMemoryLayout.GetMemorySize();
-        UNUSED(outputSize);
-
-        std::vector<emitters::IRFunctionEmitter::ConstLoopRange> ranges;
-        for (int dimensionIndex = 0; dimensionIndex < numDimensions; ++dimensionIndex)
-        {
-            ranges.push_back({ 0, outputMemoryLayout.GetActiveSize(dimensionIndex) });
-        }
-
-        function.For(ranges,
-                     [input,
-                      output,
-                      inputMemoryLayout,
-                      outputMemoryLayout,
-                      this](emitters::IRFunctionEmitter& function, std::vector<emitters::IRLocalScalar> indices) {
-                         auto inputLocation = ReorderOutputToInputLocation(indices);
-                         auto inputIndex = model::EmitGetEntryOffset(function, inputLocation, inputMemoryLayout);
-                         auto outputIndex = model::EmitGetEntryOffset(function, indices, outputMemoryLayout);
-                         output[outputIndex] = input[inputIndex];
-                     });
-    }
-
-    template <typename ValueType>
-    ell::utilities::ArchiveVersion ReorderDataNode<ValueType>::GetArchiveVersion() const
-    {
-        constexpr utilities::ArchiveVersion currentArchiveVersion = {
-            utilities::ArchiveVersionNumbers::v8_port_memory_layout
-        };
-        return std::max(currentArchiveVersion, CompilableNode::GetArchiveVersion());
-    }
-
-    template <typename ValueType>
-    bool ReorderDataNode<ValueType>::CanReadArchiveVersion(const utilities::ArchiveVersion& version) const
-    {
-        return CompilableNode::CanReadArchiveVersion(version);
-    }
-
-    template <typename ValueType>
-    void ReorderDataNode<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
-    {
-        CompilableNode::WriteToArchive(archiver);
-        archiver[defaultInputPortName] << _input;
-        archiver["inputLayout"] << _inputMemoryLayout;
-        archiver["outputLayout"] << GetOutputMemoryLayout();
-        archiver["paddingValue"] << _paddingValue;
-    }
-
-    template <typename ValueType>
-    void ReorderDataNode<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
-    {
-        CompilableNode::ReadFromArchive(archiver);
-        archiver[defaultInputPortName] >> _input;
-        archiver["inputLayout"] >> _inputMemoryLayout;
-        model::PortMemoryLayout outputMemoryLayout;
-        if (archiver.HasNextPropertyName("outputLayout"))
-        {
-            // backward-compatability
-            archiver["outputLayout"] >> outputMemoryLayout;
-
-            if (archiver.HasNextPropertyName("order"))
-            {
-                std::vector<int> order;
-                archiver["order"] >> order;
-                outputMemoryLayout = model::PortMemoryLayout(outputMemoryLayout.GetActiveSize(),
-                                                             outputMemoryLayout.GetExtent(),
-                                                             outputMemoryLayout.GetOffset(),
-                                                             outputMemoryLayout.GetCumulativeIncrement(),
-                                                             order);
-            }
-            _output.SetMemoryLayout(outputMemoryLayout);
-        }
-        else
-        {
-            _output.SetMemoryLayout(_inputMemoryLayout);
-            if (archiver.HasNextPropertyName("order"))
-            {
-                std::vector<int> order;
-                archiver["order"] >> order;
-                _output.SetMemoryLayout(GetOutputMemoryLayout().ReorderedCopy(order));
-            }
-        }
-
-        if (archiver.HasNextPropertyName("order"))
-        {
-            std::vector<int> order;
-            archiver["order"] >> order;
-            _output.SetMemoryLayout(GetOutputMemoryLayout().ReorderedCopy(order));
-        }
-
-        archiver["paddingValue"] >> _paddingValue;
-    }
-
-} // namespace nodes
-} // namespace ell
diff --git a/libraries/nodes/tcc/SinkNode.tcc b/libraries/nodes/tcc/SinkNode.tcc
deleted file mode 100644
index bcb74ebd1..000000000
--- a/libraries/nodes/tcc/SinkNode.tcc
+++ /dev/null
@@ -1,183 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     SinkNode.tcc (nodes)
-//  Authors:  Lisa Ong
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#include <utilities/include/Debug.h>
-#include <utilities/include/Exception.h>
-
-namespace ell
-{
-namespace nodes
-{
-    template <typename ValueType>
-    SinkNode<ValueType>::SinkNode() :
-        SinkNode({}, {}, model::MemoryShape{ 0 }, "", nullptr)
-    {
-    }
-
-    // Following the pattern of OutputNode, we provide a constructor override that infers the shape from the input
-    template <typename ValueType>
-    SinkNode<ValueType>::SinkNode(const model::OutputPort<ValueType>& input, const model::OutputPort<bool>& trigger, const std::string& sinkFunctionName, SinkFunction<ValueType> sink) :
-        SinkNode(input, trigger, model::MemoryShape{ static_cast<int>(input.Size()) }, sinkFunctionName, sink)
-    {
-    }
-
-    template <typename ValueType>
-    SinkNode<ValueType>::SinkNode(const model::OutputPort<ValueType>& input, const model::OutputPort<bool>& trigger, size_t outputVectorSize, const std::string& sinkFunctionName, SinkFunction<ValueType> sink) :
-        SinkNode(input, trigger, model::MemoryShape{ static_cast<int>(outputVectorSize) }, sinkFunctionName, sink)
-    {
-    }
-
-    template <typename ValueType>
-    SinkNode<ValueType>::SinkNode(const model::OutputPort<ValueType>& input, const model::OutputPort<bool>& trigger, const model::MemoryShape& shape, const std::string& sinkFunctionName, SinkFunction<ValueType> sink) :
-        model::SinkNodeBase(_input, _trigger, _output, shape, sinkFunctionName),
-        _input(this, input, defaultInputPortName),
-        _trigger(this, trigger, triggerPortName),
-        _output(this, defaultOutputPortName, shape),
-        _sink(sink == nullptr ? [](const auto&) {} : sink)
-    {
-    }
-
-    template <typename ValueType>
-    void SinkNode<ValueType>::Compute() const
-    {
-        DEBUG_THROW(_sink == nullptr, utilities::InputException(utilities::InputExceptionErrors::nullReference, "Sink function is not set"));
-
-        if (_sink != nullptr && _trigger.GetValue(0))
-        {
-            _sink(_input.GetValue());
-        }
-        _output.SetOutput(_input.GetValue());
-    }
-
-    template <typename ValueType>
-    void SinkNode<ValueType>::Compile(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
-    {
-        emitters::LLVMValue pInput = compiler.EnsurePortEmitted(input);
-        emitters::LLVMValue pTrigger = compiler.EnsurePortEmitted(trigger);
-        std::string prefixedName(compiler.GetNamespacePrefix() + "_" + GetCallbackName());
-        auto& module = function.GetModule();
-        auto triggerValue = function.ValueAt(pTrigger, 0);
-
-        function.If(emitters::TypedComparison::equals, triggerValue, function.Literal(true), [prefixedName, pInput, &module, &compiler](emitters::IRFunctionEmitter& function) {
-            // look up our global context object
-            auto context = module.GlobalPointer(compiler.GetNamespacePrefix() + "_context", emitters::VariableType::Byte);
-            auto globalContext = function.Load(context);
-
-            // Callback signature: void SinkFunction(void* context, ValueType* array)
-            const emitters::NamedVariableTypeList parameters = { { "context", emitters::VariableType::BytePointer },
-                                                                 { "output", emitters::GetPointerType(emitters::GetVariableType<ValueType>()) } };
-            module.DeclareFunction(prefixedName, emitters::VariableType::Void, parameters);
-
-            emitters::LLVMFunction pSinkFunction = module.GetFunction(prefixedName);
-            function.Call(pSinkFunction, { globalContext, function.PointerOffset(pInput, function.Literal(0)) });
-        });
-
-        // Tag the sink function as a callback that is emitted in headers
-        module.IncludeInCallbackInterface(prefixedName, "SinkNode");
-
-        // Set output values as well, useful when user code is in a non-event-driven mode
-        if (!IsScalar(input) && !compiler.GetCompilerOptions().unrollLoops)
-        {
-            SetOutputValuesLoop(compiler, function);
-        }
-        else
-        {
-            SetOutputValuesExpanded(compiler, function);
-        }
-    }
-
-    template <typename ValueType>
-    void SinkNode<ValueType>::Copy(model::ModelTransformer& transformer) const
-    {
-        const auto& newInput = transformer.GetCorrespondingInputs(_input);
-        const auto& newTrigger = transformer.GetCorrespondingInputs(_trigger);
-        auto newNode = transformer.AddNode<SinkNode<ValueType>>(newInput, newTrigger, GetShape(), GetCallbackName(), _sink);
-        transformer.MapNodeOutput(output, newNode->output);
-    }
-
-    template <typename ValueType>
-    utilities::ArchiveVersion SinkNode<ValueType>::GetArchiveVersion() const
-    {
-        constexpr utilities::ArchiveVersion sinkNodeShapeArchiveVersion = { utilities::ArchiveVersionNumbers::v6_sink_triggers };
-
-        return sinkNodeShapeArchiveVersion;
-    }
-
-    template <typename ValueType>
-    bool SinkNode<ValueType>::CanReadArchiveVersion(const utilities::ArchiveVersion& version) const
-    {
-        constexpr utilities::ArchiveVersion sinkNodeNoShapeArchiveVersion = { utilities::ArchiveVersionNumbers::v0_initial };
-        constexpr utilities::ArchiveVersion sinkNodeShapeArchiveVersion = { utilities::ArchiveVersionNumbers::v6_sink_triggers };
-
-        return version >= sinkNodeNoShapeArchiveVersion && version <= sinkNodeShapeArchiveVersion;
-    }
-
-    template <typename ValueType>
-    void SinkNode<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
-    {
-        Node::WriteToArchive(archiver);
-        archiver[defaultInputPortName] << _input;
-        archiver[triggerPortName] << _trigger;
-        archiver["sinkFunctionName"] << GetCallbackName();
-        archiver["shape"] << GetShape().ToVector();
-    }
-
-    template <typename ValueType>
-    void SinkNode<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
-    {
-        Node::ReadFromArchive(archiver);
-        archiver[defaultInputPortName] >> _input;
-        archiver[triggerPortName] >> _trigger;
-
-        std::string sinkFunctionName;
-        archiver["sinkFunctionName"] >> sinkFunctionName;
-        SetCallbackName(sinkFunctionName);
-
-        std::vector<int> shapeVector;
-        archiver["shape"] >> shapeVector;
-        SetShape({ shapeVector });
-
-        // _sink needs to be set separately
-    }
-
-    template <typename ValueType>
-    void SinkNode<ValueType>::SetOutputValuesLoop(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
-    {
-        assert(input.Size() == output.Size());
-
-        // Concatenate the input ports in a similar way as OutputNodes,
-        // because SinkNodes are just callback-enabled OutputNodes.
-        auto input = function.LocalArray(compiler.EnsurePortEmitted(_input));
-        auto output = function.LocalArray(compiler.EnsurePortEmitted(_output));
-        // check if the output variable is null.
-        function.If(ell::emitters::TypedComparison::notEquals, output, function.NullPointer(output.value->getType()->getPointerElementType()->getPointerTo()), [input, output, this](emitters::IRFunctionEmitter& function) {
-            auto size = _input.Size();
-            function.For(size, [input, output](emitters::IRFunctionEmitter& function, auto i) {
-                output[i] = input[i];
-            });
-        });
-    }
-
-    template <typename ValueType>
-    void SinkNode<ValueType>::SetOutputValuesExpanded(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
-    {
-        compiler.EnsurePortEmitted(input);
-        emitters::LLVMValue pOutput = compiler.EnsurePortEmitted(output);
-
-        auto numInputs = input.Size();
-        assert(numInputs == output.Size());
-
-        for (size_t i = 0; i < numInputs; ++i)
-        {
-            // Concatenate the input ports
-            emitters::LLVMValue value = compiler.LoadPortElementVariable(input.GetInputElement(i));
-            function.SetValueAt(pOutput, function.Literal(static_cast<int>(i)), value);
-        }
-    }
-} // namespace nodes
-} // namespace ell
\ No newline at end of file
diff --git a/libraries/nodes/tcc/SourceNode.tcc b/libraries/nodes/tcc/SourceNode.tcc
deleted file mode 100644
index 747b7557a..000000000
--- a/libraries/nodes/tcc/SourceNode.tcc
+++ /dev/null
@@ -1,205 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     SourceNode.tcc (nodes)
-//  Authors:  Lisa Ong
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace nodes
-{
-    template <typename ValueType>
-    SourceNode<ValueType>::SourceNode() :
-        SourceNode({}, model::MemoryShape{ 0 }, "", nullptr)
-    {
-    }
-
-    template <typename ValueType>
-    SourceNode<ValueType>::SourceNode(const model::OutputPort<nodes::TimeTickType>& input, size_t inputVectorSize, const std::string& sourceFunctionName, SourceFunction<ValueType> source) :
-        SourceNode(input, model::MemoryShape{ static_cast<int>(inputVectorSize) }, sourceFunctionName, source)
-    {
-    }
-
-    template <typename ValueType>
-    SourceNode<ValueType>::SourceNode(const model::OutputPort<nodes::TimeTickType>& input, const model::MemoryShape& shape, const std::string& sourceFunctionName, SourceFunction<ValueType> source) :
-        model::SourceNodeBase(_input, _output, sourceFunctionName),
-        _input(this, input, defaultInputPortName),
-        _output(this, defaultOutputPortName, shape),
-        _source(source == nullptr ? [](auto&) { return false; } : source)
-    {
-        _bufferedSample.resize(shape.NumElements());
-    }
-
-    template <typename ValueType>
-    SourceNode<ValueType>::SourceNode(const model::OutputPort<nodes::TimeTickType>& input, const model::PortMemoryLayout& layout, const std::string& sourceFunctionName, SourceFunction<ValueType> source) :
-        model::SourceNodeBase(_input, _output, sourceFunctionName),
-        _input(this, input, defaultInputPortName),
-        _output(this, defaultOutputPortName, layout),
-        _source(source == nullptr ? [](auto&) { return false; } : source)
-    {
-        _bufferedSample.resize(layout.NumElements());
-    }
-
-    template <typename ValueType>
-    void SourceNode<ValueType>::SetInput(std::vector<ValueType> inputValues)
-    {
-        assert(_bufferedSample.size() == inputValues.size());
-        _bufferedSample = inputValues;
-    }
-
-    template <typename ValueType>
-    void SourceNode<ValueType>::Compute() const
-    {
-        auto sampleTime = _input.GetValue(0);
-
-        if (_source(_bufferedSample))
-        {
-            // Determine if the sample time differs from the current time
-            auto currentTime = _input.GetValue(1);
-            if (currentTime > sampleTime)
-            {
-                // Interpolate _bufferedSample to match the sample time
-                Interpolate(currentTime, sampleTime);
-            }
-        }
-
-        _bufferedSampleTime = sampleTime;
-        _output.SetOutput(_bufferedSample);
-    }
-
-    template <typename ValueType>
-    void SourceNode<ValueType>::Compile(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
-    {
-        emitters::LLVMValue pInput = compiler.EnsurePortEmitted(input);
-        compiler.EnsurePortEmitted(output);
-        auto& module = function.GetModule();
-
-        // Globals
-        emitters::Variable* pBufferedSampleTimeVar = module.Variables().AddVariable<emitters::InitializedScalarVariable<TimeTickType>>(emitters::VariableScope::global, _bufferedSampleTime);
-        emitters::Variable* pBufferedSampleVar = module.Variables().AddVariable<emitters::InitializedVectorVariable<ValueType>>(emitters::VariableScope::global, output.Size());
-        emitters::LLVMValue pBufferedSampleTime = module.EnsureEmitted(*pBufferedSampleTimeVar);
-        emitters::LLVMValue pBufferedSample = module.EnsureEmitted(*pBufferedSampleVar);
-        emitters::LLVMValue bufferedSampleTime = function.Load(pBufferedSampleTime);
-        UNUSED(bufferedSampleTime);
-
-        // Callback function
-        const emitters::NamedVariableTypeList parameters = { { "context", emitters::VariableType::BytePointer },
-                                                             { "input", emitters::GetPointerType(emitters::GetVariableType<ValueType>()) } };
-        std::string prefixedName(compiler.GetNamespacePrefix() + "_" + GetCallbackName());
-        module.DeclareFunction(prefixedName, emitters::GetVariableType<bool>(), parameters);
-        module.IncludeInCallbackInterface(prefixedName, "SourceNode");
-
-        emitters::LLVMFunction pSamplingFunction = module.GetFunction(prefixedName);
-
-        // look up our global context object
-        auto context = module.GlobalPointer(compiler.GetNamespacePrefix() + "_context", emitters::VariableType::Byte);
-        auto globalContext = function.Load(context);
-
-        // Locals
-        auto sampleTime = function.ValueAt(pInput, function.Literal(0));
-
-        // Invoke the callback and optionally interpolate.
-        function.Call(pSamplingFunction, { globalContext, function.PointerOffset(pBufferedSample, 0) });
-
-        // TODO: Interpolate if there is a sample, and currentTime > sampleTime
-        // Note: currentTime can be retrieved via currentTime = function.ValueAt(pInput, function.Literal(1));
-
-        // Set sample values to the output
-        if (!IsScalar(output) && !compiler.GetCompilerOptions().unrollLoops)
-        {
-            SetOutputValuesLoop(compiler, function, pBufferedSample);
-        }
-        else
-        {
-            SetOutputValuesExpanded(compiler, function, pBufferedSample);
-        }
-
-        // Update the cached sample time
-        function.Store(pBufferedSampleTime, sampleTime);
-    }
-
-    template <typename ValueType>
-    void SourceNode<ValueType>::Copy(model::ModelTransformer& transformer) const
-    {
-        const auto& newPortElements = transformer.GetCorrespondingInputs(_input);
-        auto newNode = transformer.AddNode<SourceNode<ValueType>>(newPortElements, GetShape(), GetCallbackName(), _source);
-        transformer.MapNodeOutput(output, newNode->output);
-    }
-
-    template <typename ValueType>
-    utilities::ArchiveVersion SourceNode<ValueType>::GetArchiveVersion() const
-    {
-        constexpr utilities::ArchiveVersion sourceNodeShapeArchiveVersion = { utilities::ArchiveVersionNumbers::v4_source_sink_shapes };
-
-        return sourceNodeShapeArchiveVersion;
-    }
-
-    template <typename ValueType>
-    bool SourceNode<ValueType>::CanReadArchiveVersion(const utilities::ArchiveVersion& version) const
-    {
-        constexpr utilities::ArchiveVersion sourceNodeNoShapeArchiveVersion = { utilities::ArchiveVersionNumbers::v0_initial };
-        constexpr utilities::ArchiveVersion sourceNodeShapeArchiveVersion = { utilities::ArchiveVersionNumbers::v4_source_sink_shapes };
-
-        return version >= sourceNodeNoShapeArchiveVersion && version <= sourceNodeShapeArchiveVersion;
-    }
-
-    template <typename ValueType>
-    void SourceNode<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
-    {
-        Node::WriteToArchive(archiver);
-        archiver[defaultInputPortName] << _input;
-        archiver[defaultOutputPortName] << _output;
-        archiver["sourceFunctionName"] << GetCallbackName();
-        archiver["shape"] << GetShape().ToVector();
-    }
-
-    template <typename ValueType>
-    void SourceNode<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
-    {
-        Node::ReadFromArchive(archiver);
-        archiver[defaultInputPortName] >> _input;
-        archiver[defaultOutputPortName] >> _output;
-
-        std::string sourceFunctionName;
-        archiver["sourceFunctionName"] >> sourceFunctionName;
-        SetCallbackName(sourceFunctionName);
-
-        std::vector<int> shapeVector;
-        archiver["shape"] >> shapeVector;
-        SetShape({ shapeVector });
-    }
-
-    template <typename ValueType>
-    void SourceNode<ValueType>::Interpolate(TimeTickType /*originalTime*/, TimeTickType /*newTime*/) const
-    {
-        // Default to pass-through (derived classes will override).
-    }
-
-    template <typename ValueType>
-    void SourceNode<ValueType>::SetOutputValuesLoop(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function, emitters::LLVMValue sample)
-    {
-        emitters::LLVMValue pOutput = compiler.EnsurePortEmitted(output);
-
-        auto numValues = output.Size();
-        function.For(numValues, [sample, pOutput](emitters::IRFunctionEmitter& function, emitters::LLVMValue i) {
-            auto value = function.ValueAt(sample, i);
-            function.SetValueAt(pOutput, i, value);
-        });
-    }
-
-    template <typename ValueType>
-    void SourceNode<ValueType>::SetOutputValuesExpanded(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function, emitters::LLVMValue sample)
-    {
-        emitters::LLVMValue pOutput = compiler.EnsurePortEmitted(output);
-
-        auto numValues = output.Size();
-        for (size_t i = 0; i < numValues; ++i)
-        {
-            auto value = function.ValueAt(sample, i);
-            function.SetValueAt(pOutput, function.Literal(static_cast<int>(i)), value);
-        }
-    }
-} // namespace nodes
-} // namespace ell
\ No newline at end of file
diff --git a/libraries/nodes/tcc/SquaredEuclideanDistanceNode.tcc b/libraries/nodes/tcc/SquaredEuclideanDistanceNode.tcc
deleted file mode 100644
index 3a86eb39a..000000000
--- a/libraries/nodes/tcc/SquaredEuclideanDistanceNode.tcc
+++ /dev/null
@@ -1,135 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     SquaredEuclideanDistanceNode.tcc (nodes)
-//  Authors:  Suresh Iyengar, Kern Handa
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace nodes
-{
-    template <typename ValueType, math::MatrixLayout layout>
-    SquaredEuclideanDistanceNode<ValueType, layout>::SquaredEuclideanDistanceNode() :
-        Node({ &_input }, { &_output }),
-        _input(this, {}, defaultInputPortName),
-        _output(this, defaultOutputPortName, 1),
-        _vectorsAsMatrix(0, 0)
-    {
-    }
-
-    template <typename ValueType, math::MatrixLayout layout>
-    SquaredEuclideanDistanceNode<ValueType, layout>::SquaredEuclideanDistanceNode(const model::OutputPort<ValueType>& input, const math::Matrix<ValueType, layout>& vectorsAsMatrix) :
-        Node({ &_input }, { &_output }),
-        _input(this, input, defaultInputPortName),
-        _output(this, defaultOutputPortName, vectorsAsMatrix.NumRows()),
-        _vectorsAsMatrix(vectorsAsMatrix)
-    {
-        if (input.Size() != vectorsAsMatrix.NumColumns())
-        {
-            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "SquaredEuclideanDistanceNode: input size must match the number of columns in the vectorsAsMatrix");
-        }
-    }
-
-    template <typename ValueType, math::MatrixLayout layout>
-    void SquaredEuclideanDistanceNode<ValueType, layout>::WriteToArchive(utilities::Archiver& archiver) const
-    {
-        Node::WriteToArchive(archiver);
-
-        math::MatrixArchiver::Write(_vectorsAsMatrix, "vectorsAsMatrix", archiver);
-        archiver[defaultInputPortName] << _input;
-        archiver[defaultOutputPortName] << _output;
-    }
-
-    template <typename ValueType, math::MatrixLayout layout>
-    void SquaredEuclideanDistanceNode<ValueType, layout>::ReadFromArchive(utilities::Unarchiver& archiver)
-    {
-        Node::ReadFromArchive(archiver);
-
-        math::MatrixArchiver::Read(_vectorsAsMatrix, "vectorsAsMatrix", archiver);
-        archiver[defaultInputPortName] >> _input;
-        archiver[defaultOutputPortName] >> _output;
-    }
-
-    template <typename ValueType, math::MatrixLayout layout>
-    void SquaredEuclideanDistanceNode<ValueType, layout>::Copy(model::ModelTransformer& transformer) const
-    {
-        const auto& newPortElements = transformer.GetCorrespondingInputs(_input);
-        auto newNode = transformer.AddNode<SquaredEuclideanDistanceNode<ValueType, layout>>(newPortElements, _vectorsAsMatrix);
-        transformer.MapNodeOutput(output, newNode->output);
-    }
-
-    // We compute the distance (P - V)^2 as P^2 - 2 * P * V + V^2 where P is the input point and V is the set of vectors
-    template <typename ValueType, math::MatrixLayout layout>
-    bool SquaredEuclideanDistanceNode<ValueType, layout>::Refine(model::ModelTransformer& transformer) const
-    {
-        const auto& inputPortElements = transformer.GetCorrespondingInputs(_input);
-
-        // P^2 => scalar value
-        auto inputNorm2SquaredNode = transformer.AddNode<L2NormSquaredNode<double>>(inputPortElements);
-
-        // -2 * P * V => row-wise vector
-        auto vectorsAsMatrix = _vectorsAsMatrix;
-        vectorsAsMatrix.Transform([](double d) { return -2.0 * d; });
-        auto productNode = transformer.AddNode<MatrixVectorProductNode<double, math::MatrixLayout::rowMajor>>(inputPortElements, vectorsAsMatrix);
-
-        // Will hold the scalar value of P^2 for each row in the matrix
-        model::PortElements<ValueType> inputNorm2SquaredNodeOutputs;
-        // V^2 => row-wise vector of Norm-2 squared values of each vector in _vectorsAsMatrix
-        model::PortElements<ValueType> vectorNorm2SquaredConstantNodeOutputs;
-        for (size_t index = 0; index < _vectorsAsMatrix.NumRows(); ++index)
-        {
-            inputNorm2SquaredNodeOutputs.Append(inputNorm2SquaredNode->output);
-
-            auto matrixRow = _vectorsAsMatrix.GetRow(index);
-            auto rowNorm2SquaredConstantNode = transformer.AddNode<ConstantNode<ValueType>>(matrixRow.Norm2Squared());
-            vectorNorm2SquaredConstantNodeOutputs.Append(rowNorm2SquaredConstantNode->output);
-        }
-
-        // Add the three node outputs:
-        //   * inputNorm2SquaredNodeOutputs (A)
-        //   * vectorNorm2SquaredConstantNodeOutputs (B)
-        //   * productNode->output (C)
-        // and map it to output node
-        auto& A = inputNorm2SquaredNodeOutputs;
-        auto& B = vectorNorm2SquaredConstantNodeOutputs;
-        auto& C = productNode->output;
-        auto aPlusB = transformer.AddNode<BinaryOperationNode<double>>(A, B, emitters::BinaryOperationType::add);
-        auto aPlusBPlusC = transformer.AddNode<BinaryOperationNode<double>>(aPlusB->output, C, emitters::BinaryOperationType::add);
-        transformer.MapNodeOutput(output, aPlusBPlusC->output);
-
-        return true;
-    }
-
-    template <typename ValueType, math::MatrixLayout layout>
-    void SquaredEuclideanDistanceNode<ValueType, layout>::Compute() const
-    {
-        math::ColumnVector<ValueType> input(_input.Size());
-        for (size_t index = 0; index < _input.Size(); ++index)
-        {
-            input[index] = _input[index];
-        }
-
-        math::ColumnVector<ValueType> result(_vectorsAsMatrix.NumRows());
-
-        auto norm1sq = input.Norm2Squared();
-
-        // result = -2 * _v * input
-        math::MultiplyScaleAddUpdate(-2.0, _vectorsAsMatrix, input, 0.0, result);
-
-        for (size_t r = 0; r < _vectorsAsMatrix.NumRows(); r++)
-        {
-            result[r] += norm1sq + _vectorsAsMatrix.GetRow(r).Norm2Squared();
-        }
-
-        _output.SetOutput(result.ToArray());
-    }
-
-    template <typename ValueType, math::MatrixLayout layout>
-    SquaredEuclideanDistanceNode<ValueType, layout>* AddNodeToModelTransformer(const model::PortElements<ValueType>& input, math::ConstMatrixReference<ValueType, layout> vectorsAsMatrix, model::ModelTransformer& transformer)
-    {
-        return transformer.AddNode<SquaredEuclideanDistanceNode>(input, vectorsAsMatrix);
-    }
-} // namespace nodes
-} // namespace ell
diff --git a/libraries/nodes/tcc/SumNode.tcc b/libraries/nodes/tcc/SumNode.tcc
deleted file mode 100644
index 7a0142fa3..000000000
--- a/libraries/nodes/tcc/SumNode.tcc
+++ /dev/null
@@ -1,188 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     SumNode.tcc (nodes)
-//  Authors:  Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#include <utilities/include/Unused.h>
-
-namespace ell
-{
-namespace nodes
-{
-    template <typename ValueType>
-    SumNode<ValueType>::SumNode() :
-        CompilableNode({ &_input }, { &_output }),
-        _input(this, {}, defaultInputPortName),
-        _output(this, defaultOutputPortName, 1)
-    {
-    }
-
-    template <typename ValueType>
-    SumNode<ValueType>::SumNode(const model::OutputPort<ValueType>& input) :
-        CompilableNode({ &_input }, { &_output }),
-        _input(this, input, defaultInputPortName),
-        _output(this, defaultOutputPortName, 1)
-    {
-    }
-
-    template <typename ValueType>
-    void SumNode<ValueType>::Compute() const
-    {
-        ValueType result = 0;
-        for (size_t index = 0; index < _input.Size(); ++index)
-        {
-            auto v = _input[index];
-            result += v;
-        }
-        _output.SetOutput({ result });
-    };
-
-    template <typename ValueType>
-    void SumNode<ValueType>::Copy(model::ModelTransformer& transformer) const
-    {
-        const auto& newPortElements = transformer.GetCorrespondingInputs(_input);
-        auto newNode = transformer.AddNode<SumNode<ValueType>>(newPortElements);
-        transformer.MapNodeOutput(output, newNode->output);
-    }
-
-    template <typename ValueType>
-    void SumNode<ValueType>::Compile(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
-    {
-        if (!compiler.GetCompilerOptions().unrollLoops)
-        {
-            size_t vectorSize = compiler.GetCompilerOptions().vectorWidth;
-            bool vectorize = compiler.GetCompilerOptions().allowVectorInstructions && (input.Size() > vectorSize);
-            if (vectorize)
-            {
-                CompileVectorizedLoop(compiler, function);
-            }
-            else
-            {
-                CompileLoop(compiler, function);
-            }
-        }
-        else
-        {
-            CompileExpanded(compiler, function);
-        }
-    }
-
-    template <typename ValueType>
-    void SumNode<ValueType>::CompileLoop(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
-    {
-        auto input = function.LocalArray(compiler.EnsurePortEmitted(_input));
-        auto output = function.LocalArray(compiler.EnsurePortEmitted(_output));
-
-        function.StoreZero(output);
-
-        const int size = _input.Size();
-        constexpr int blockSize = 4;
-        bool unrollLoop = size > 4 * blockSize; // silly heuristic
-        if (unrollLoop)
-        {
-            const int numBlocks = size / blockSize;
-            function.For(numBlocks, [input, output, blockSize](emitters::IRFunctionEmitter& function, auto i) {
-                auto blockStart = blockSize * i;
-                for (int innerIndex = 0; innerIndex < blockSize; ++innerIndex)
-                {
-                    emitters::IRLocalScalar value = input[blockStart + innerIndex];
-                    function.OperationAndUpdate(output, emitters::GetAddForValueType<ValueType>(), value);
-                }
-            });
-
-            // epilogue
-            const int epilogueSize = size - (blockSize * numBlocks);
-            if (epilogueSize > 0)
-            {
-                function.For(epilogueSize, [input, output](emitters::IRFunctionEmitter& function, auto i) {
-                    emitters::IRLocalScalar value = input[i];
-                    function.OperationAndUpdate(output, emitters::GetAddForValueType<ValueType>(), value);
-                });
-            }
-        }
-        else
-        {
-            function.For(size, [input, output](emitters::IRFunctionEmitter& function, auto i) {
-                emitters::IRLocalScalar value = input[i];
-                function.OperationAndUpdate(output, emitters::GetAddForValueType<ValueType>(), value);
-            });
-        }
-    }
-
-    template <typename ValueType>
-    void SumNode<ValueType>::CompileVectorizedLoop(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
-    {
-        const int size = _input.Size();
-        const int vectorSize = compiler.GetCompilerOptions().vectorWidth;
-        assert(size >= vectorSize);
-
-        emitters::LLVMValue input = compiler.EnsurePortEmitted(_input);
-        emitters::LLVMValue output = compiler.EnsurePortEmitted(_output);
-
-        // Get LLVM types
-        auto& emitter = function.GetEmitter();
-        auto elementType = emitter.Type(emitters::GetVariableType<ValueType>());
-        DEBUG_USED(elementType);
-        assert(llvm::VectorType::isValidElementType(elementType) && "Invalid element type for LLVM vector");
-        auto vectorType = emitter.VectorType(emitters::GetVariableType<ValueType>(), vectorSize);
-        auto vectorPointerType = vectorType->getPointerTo();
-
-        // cast input to pointer-to-vector
-        auto inputVector = function.CastPointer(input, vectorPointerType);
-
-        emitters::LLVMValue vectorAccumVar = function.Variable(vectorType, "vecAccum");
-        function.Store(vectorAccumVar, emitters::FillVector<ValueType>(function, vectorType, 0));
-
-        const int numBlocks = size / vectorSize;
-        function.For(numBlocks, [inputVector, vectorAccumVar](emitters::IRFunctionEmitter& function, auto blockIndex) {
-            auto value = function.ValueAt(inputVector, blockIndex);
-            function.OperationAndUpdate(vectorAccumVar, emitters::GetAddForValueType<ValueType>(), value);
-        });
-
-        // Accumulate horizontal sum into output
-        auto sum = emitters::HorizontalVectorSum<ValueType>(function, function.Load(vectorAccumVar));
-
-        // epilogue
-        const int epilogueSize = size - (vectorSize * numBlocks);
-        if (epilogueSize > 0)
-        {
-            for (int epilogueIndex = vectorSize * numBlocks; epilogueIndex < size; ++epilogueIndex)
-            {
-                emitters::LLVMValue pValue = function.ValueAt(input, function.Literal<int>(epilogueIndex));
-                sum = function.Operator(emitters::GetAddForValueType<ValueType>(), sum, pValue);
-            }
-        }
-        function.Store(output, sum);
-    }
-
-    template <typename ValueType>
-    void SumNode<ValueType>::CompileExpanded(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
-    {
-        emitters::LLVMValue pResult = compiler.EnsurePortEmitted(output);
-
-        function.StoreZero(pResult);
-        for (size_t i = 0; i < input.Size(); ++i)
-        {
-            auto pValue = compiler.LoadPortElementVariable(input.GetInputElement(i));
-            function.OperationAndUpdate(pResult, emitters::GetAddForValueType<ValueType>(), pValue);
-        }
-    }
-
-    template <typename ValueType>
-    void SumNode<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
-    {
-        Node::WriteToArchive(archiver);
-        archiver[defaultInputPortName] << _input;
-    }
-
-    template <typename ValueType>
-    void SumNode<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
-    {
-        Node::ReadFromArchive(archiver);
-        archiver[defaultInputPortName] >> _input;
-    }
-} // namespace nodes
-} // namespace ell
diff --git a/libraries/nodes/tcc/TypeCastNode.tcc b/libraries/nodes/tcc/TypeCastNode.tcc
deleted file mode 100644
index 7583170ed..000000000
--- a/libraries/nodes/tcc/TypeCastNode.tcc
+++ /dev/null
@@ -1,112 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     TypeCastNode.tcc (nodes)
-//  Authors:  Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace nodes
-{
-    template <typename InputValueType, typename OutputValueType>
-    TypeCastNode<InputValueType, OutputValueType>::TypeCastNode() :
-        CompilableNode({ &_input }, { &_output }),
-        _input(this, {}, defaultInputPortName),
-        _output(this, defaultOutputPortName, 0){};
-
-    template <typename InputValueType, typename OutputValueType>
-    TypeCastNode<InputValueType, OutputValueType>::TypeCastNode(const model::OutputPort<InputValueType>& input) :
-        CompilableNode({ &_input }, { &_output }),
-        _input(this, input, defaultInputPortName),
-        _output(this, defaultOutputPortName, input.Size()){};
-
-    template <typename InputValueType, typename OutputValueType>
-    void TypeCastNode<InputValueType, OutputValueType>::Compute() const
-    {
-        auto size = _output.Size();
-        std::vector<OutputValueType> outputValues(size);
-        for (size_t index = 0; index < size; ++index)
-        {
-            outputValues[index] = static_cast<OutputValueType>(_input[index]);
-        }
-        _output.SetOutput(outputValues);
-    }
-
-    template <typename InputValueType, typename OutputValueType>
-    void TypeCastNode<InputValueType, OutputValueType>::Copy(model::ModelTransformer& transformer) const
-    {
-        const auto& newPortElements = transformer.GetCorrespondingInputs(_input);
-        auto newNode = transformer.AddNode<TypeCastNode<InputValueType, OutputValueType>>(newPortElements);
-        transformer.MapNodeOutput(output, newNode->output);
-    }
-
-    template <typename InputValueType, typename OutputValueType>
-    void TypeCastNode<InputValueType, OutputValueType>::Compile(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
-    {
-        // The IR compiler currently implements bools using integers. We'll just use the already created variable.
-        auto inputType = emitters::GetVariableType<InputValueType>();
-        auto outputType = emitters::GetVariableType<OutputValueType>();
-
-        // no-op case
-        if (inputType == outputType)
-        {
-            emitters::Variable* elementVar = compiler.GetVariableForPort(input.GetReferencedPort());
-            compiler.SetVariableForPort(output, elementVar); // The types are the same, so this is a no-op. Just set the output variable to be the same as the input variable
-            return;
-        }
-
-        if (!compiler.GetCompilerOptions().unrollLoops)
-        {
-            CompileLoop(compiler, function);
-        }
-        else
-        {
-            CompileExpanded(compiler, function);
-        }
-    }
-
-    template <typename InputValueType, typename OutputValueType>
-    void TypeCastNode<InputValueType, OutputValueType>::CompileLoop(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
-    {
-        auto count = input.Size();
-        emitters::LLVMValue pInput = compiler.EnsurePortEmitted(input);
-        emitters::LLVMValue pResult = compiler.EnsurePortEmitted(output);
-
-        function.For(count, [pInput, pResult](emitters::IRFunctionEmitter& function, emitters::LLVMValue i) {
-            emitters::LLVMValue inputValue = function.ValueAt(pInput, i);
-            emitters::LLVMValue castElement = function.CastValue<OutputValueType>(inputValue);
-            function.SetValueAt(pResult, i, castElement);
-        });
-    }
-
-    template <typename InputValueType, typename OutputValueType>
-    void TypeCastNode<InputValueType, OutputValueType>::CompileExpanded(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
-    {
-        emitters::LLVMValue pResult = compiler.EnsurePortEmitted(output);
-
-        for (size_t i = 0; i < input.Size(); ++i)
-        {
-            emitters::LLVMValue inputValue = compiler.LoadPortElementVariable(input.GetInputElement(i));
-            emitters::LLVMValue castElement = function.CastValue<OutputValueType>(inputValue);
-            function.SetValueAt(pResult, function.Literal((int)i), castElement);
-        }
-    }
-
-    template <typename InputValueType, typename OutputValueType>
-    void TypeCastNode<InputValueType, OutputValueType>::WriteToArchive(utilities::Archiver& archiver) const
-    {
-        Node::WriteToArchive(archiver);
-        archiver[defaultInputPortName] << _input;
-    }
-
-    template <typename InputValueType, typename OutputValueType>
-    void TypeCastNode<InputValueType, OutputValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
-    {
-        Node::ReadFromArchive(archiver);
-        archiver[defaultInputPortName] >> _input;
-        _output.SetSize(_input.Size());
-    }
-} // namespace nodes
-} // namespace ell
diff --git a/libraries/nodes/tcc/UnaryOperationNode.tcc b/libraries/nodes/tcc/UnaryOperationNode.tcc
deleted file mode 100644
index 3e5418b39..000000000
--- a/libraries/nodes/tcc/UnaryOperationNode.tcc
+++ /dev/null
@@ -1,292 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     UnaryOperationNode.tcc (nodes)
-//  Authors:  Chuck Jacobs, Kern Handa
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#define ADD_TO_STRING_ENTRY(NAMESPACE, OPERATOR) \
-    case NAMESPACE::OPERATOR:                    \
-        return #OPERATOR;
-#define BEGIN_FROM_STRING if (false)
-#define ADD_FROM_STRING_ENTRY(NAMESPACE, OPERATOR) else if (name == #OPERATOR) return NAMESPACE::OPERATOR
-
-namespace ell
-{
-namespace nodes
-{
-    namespace UnaryOperations
-    {
-        inline std::string to_string(emitters::UnaryOperationType op)
-        {
-            switch (op)
-            {
-                ADD_TO_STRING_ENTRY(emitters::UnaryOperationType, none);
-                ADD_TO_STRING_ENTRY(emitters::UnaryOperationType, sqrt);
-                ADD_TO_STRING_ENTRY(emitters::UnaryOperationType, logicalNot);
-                ADD_TO_STRING_ENTRY(emitters::UnaryOperationType, tanh);
-                ADD_TO_STRING_ENTRY(emitters::UnaryOperationType, exp);
-                ADD_TO_STRING_ENTRY(emitters::UnaryOperationType, square);
-                ADD_TO_STRING_ENTRY(emitters::UnaryOperationType, log);
-
-            default:
-                throw utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "Unknown unary operation");
-            }
-        }
-
-        inline emitters::UnaryOperationType from_string(std::string name)
-        {
-            BEGIN_FROM_STRING;
-            ADD_FROM_STRING_ENTRY(emitters::UnaryOperationType, none);
-            ADD_FROM_STRING_ENTRY(emitters::UnaryOperationType, sqrt);
-            ADD_FROM_STRING_ENTRY(emitters::UnaryOperationType, logicalNot);
-            ADD_FROM_STRING_ENTRY(emitters::UnaryOperationType, tanh);
-            ADD_FROM_STRING_ENTRY(emitters::UnaryOperationType, exp);
-            ADD_FROM_STRING_ENTRY(emitters::UnaryOperationType, square);
-            ADD_FROM_STRING_ENTRY(emitters::UnaryOperationType, log);
-
-            throw utilities::InputException(utilities::InputExceptionErrors::indexOutOfRange, "Unknown unary operation");
-        }
-
-        template <typename ValueType>
-        ValueType Sqrt(ValueType a)
-        {
-            return std::sqrt(a);
-        }
-
-        template <>
-        inline bool Sqrt(bool x)
-        {
-            throw utilities::InputException(utilities::InputExceptionErrors::typeMismatch, "Error: taking sqrt of a boolean value");
-        }
-
-        template <typename ValueType>
-        ValueType LogicalNot(ValueType a)
-        {
-            throw utilities::InputException(utilities::InputExceptionErrors::typeMismatch, "Error: taking not of a non-boolean value");
-        }
-
-        template <>
-        inline bool LogicalNot(bool x)
-        {
-            return !x;
-        }
-
-        template <typename ValueType>
-        ValueType Tanh(ValueType a)
-        {
-            return std::tanh(a);
-        }
-
-        template <>
-        inline bool Tanh(bool x)
-        {
-            throw utilities::InputException(utilities::InputExceptionErrors::typeMismatch, "Error: taking tanh of a boolean value");
-        }
-
-        template <typename ValueType>
-        ValueType Exp(ValueType a)
-        {
-            return std::exp(a);
-        }
-
-        template <>
-        inline bool Exp(bool x)
-        {
-            throw utilities::InputException(utilities::InputExceptionErrors::typeMismatch, "Error: taking exp of a boolean value");
-        }
-
-        template <typename ValueType>
-        ValueType Square(ValueType a)
-        {
-            return a * a;
-        }
-
-        template <>
-        inline bool Square(bool)
-        {
-            throw utilities::InputException(utilities::InputExceptionErrors::typeMismatch, "Error: taking square of a boolean value");
-        }
-
-        template <typename ValueType>
-        ValueType Log(ValueType a)
-        {
-            return std::log(a);
-        }
-
-        template <>
-        inline bool Log(bool)
-        {
-            throw utilities::InputException(utilities::InputExceptionErrors::typeMismatch, "Error: taking log of a boolean value");
-        }
-    } // namespace UnaryOperations
-
-    template <typename ValueType>
-    UnaryOperationNode<ValueType>::UnaryOperationNode() :
-        CompilableNode({ &_input }, { &_output }),
-        _input(this, {}, defaultInputPortName),
-        _output(this, defaultOutputPortName, 0),
-        _operation(emitters::UnaryOperationType::none)
-    {
-    }
-
-    template <typename ValueType>
-    UnaryOperationNode<ValueType>::UnaryOperationNode(const model::OutputPort<ValueType>& input, emitters::UnaryOperationType operation) :
-        CompilableNode({ &_input }, { &_output }),
-        _input(this, input, defaultInputPortName),
-        _output(this, defaultOutputPortName, _input.Size()),
-        _operation(operation)
-    {
-    }
-
-    template <typename ValueType>
-    template <typename Operation>
-    std::vector<ValueType> UnaryOperationNode<ValueType>::ComputeOutput(Operation&& function) const
-    {
-        auto output = std::vector<ValueType>(_input.Size());
-        for (size_t index = 0; index < _input.Size(); index++)
-        {
-            output[index] = function(_input[index]);
-        }
-        return output;
-    }
-
-    template <typename ValueType>
-    void UnaryOperationNode<ValueType>::Compute() const
-    {
-        std::vector<ValueType> output;
-        switch (_operation)
-        {
-        case emitters::UnaryOperationType::sqrt:
-            output = ComputeOutput(UnaryOperations::Sqrt<ValueType>);
-            break;
-        case emitters::UnaryOperationType::logicalNot:
-            output = ComputeOutput(UnaryOperations::LogicalNot<ValueType>);
-            break;
-        case emitters::UnaryOperationType::exp:
-            output = ComputeOutput(UnaryOperations::Exp<ValueType>);
-            break;
-        case emitters::UnaryOperationType::tanh:
-            output = ComputeOutput(UnaryOperations::Tanh<ValueType>);
-            break;
-        case emitters::UnaryOperationType::square:
-            output = ComputeOutput(UnaryOperations::Square<ValueType>);
-            break;
-        case emitters::UnaryOperationType::log:
-            output = ComputeOutput(UnaryOperations::Log<ValueType>);
-            break;
-        default:
-            throw utilities::LogicException(utilities::LogicExceptionErrors::notImplemented, "Unknown operation type");
-        }
-        _output.SetOutput(output);
-    };
-
-    template <typename ValueType>
-    void UnaryOperationNode<ValueType>::Copy(model::ModelTransformer& transformer) const
-    {
-        const auto& newPortElements = transformer.GetCorrespondingInputs(_input);
-        auto newNode = transformer.AddNode<UnaryOperationNode<ValueType>>(newPortElements, _operation);
-        transformer.MapNodeOutput(output, newNode->output);
-    }
-
-    template <typename ValueType>
-    emitters::LLVMFunction UnaryOperationNode<ValueType>::GetOperator(emitters::IRFunctionEmitter& function) const
-    {
-        switch (this->GetOperation())
-        {
-        case emitters::UnaryOperationType::sqrt:
-            return function.GetModule().GetRuntime().GetSqrtFunction<ValueType>();
-        case emitters::UnaryOperationType::exp:
-            return function.GetModule().GetRuntime().GetExpFunction<ValueType>();
-        case emitters::UnaryOperationType::log:
-            return function.GetModule().GetRuntime().GetLogFunction<ValueType>();
-        case emitters::UnaryOperationType::logicalNot:
-        {
-            auto& module = function.GetModule();
-            auto& f = module.BeginFunction("logicalNot", emitters::GetVariableType<bool>(), { { "value", emitters::GetVariableType<ValueType>() } });
-            auto args = f.Arguments().begin();
-            llvm::Argument& val = *args;
-            f.Return(f.LogicalNot(&val));
-            module.EndFunction();
-            return f.GetFunction();
-        }
-        case emitters::UnaryOperationType::square:
-        {
-            auto& module = function.GetModule();
-            auto& f = module.BeginFunction("square", emitters::GetVariableType<ValueType>(), { { "value", emitters::GetVariableType<ValueType>() } });
-            auto args = f.Arguments().begin();
-            llvm::Argument& val = *args;
-            f.Return(f.Operator(emitters::GetMultiplyForValueType<ValueType>(), &val, &val));
-            module.EndFunction();
-            return f.GetFunction();
-        }
-        case emitters::UnaryOperationType::tanh:
-            return function.GetModule().GetRuntime().GetTanhFunction<ValueType>();
-        case emitters::UnaryOperationType::none:
-        default:
-            throw emitters::EmitterException(emitters::EmitterError::unaryOperationNotSupported);
-        }
-    }
-
-    template <typename ValueType>
-    void UnaryOperationNode<ValueType>::Compile(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
-    {
-        if (!compiler.GetCompilerOptions().unrollLoops)
-        {
-            CompileLoop(compiler, function);
-        }
-        else
-        {
-            CompileExpanded(compiler, function);
-        }
-    }
-
-    template <typename ValueType>
-    void UnaryOperationNode<ValueType>::CompileLoop(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
-    {
-        // Loop version broken
-        auto count = input.Size();
-        emitters::LLVMValue pInput = compiler.EnsurePortEmitted(input);
-        emitters::LLVMValue pResult = compiler.EnsurePortEmitted(output);
-
-        function.For(count, [pInput, pResult, this](emitters::IRFunctionEmitter& function, emitters::LLVMValue i) {
-            emitters::LLVMValue inputValue = function.ValueAt(pInput, i);
-            emitters::LLVMValue pOpResult = function.Call(GetOperator(function), { inputValue });
-            function.SetValueAt(pResult, i, pOpResult);
-        });
-    }
-
-    template <typename ValueType>
-    void UnaryOperationNode<ValueType>::CompileExpanded(model::IRMapCompiler& compiler, emitters::IRFunctionEmitter& function)
-    {
-        emitters::LLVMValue pResult = compiler.EnsurePortEmitted(output);
-
-        for (size_t i = 0; i < input.Size(); ++i)
-        {
-            emitters::LLVMValue inputValue = compiler.LoadPortElementVariable(input.GetInputElement(i));
-            emitters::LLVMValue pOpResult = function.Call(GetOperator(function), { inputValue });
-            function.SetValueAt(pResult, function.Literal((int)i), pOpResult);
-        }
-    }
-
-    template <typename ValueType>
-    void UnaryOperationNode<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
-    {
-        Node::WriteToArchive(archiver);
-        archiver[defaultInputPortName] << _input;
-        archiver["operation"] << UnaryOperations::to_string(_operation);
-    }
-
-    template <typename ValueType>
-    void UnaryOperationNode<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
-    {
-        Node::ReadFromArchive(archiver);
-        archiver[defaultInputPortName] >> _input;
-        std::string operation;
-        archiver["operation"] >> operation;
-        _operation = UnaryOperations::from_string(operation);
-        _output.SetSize(_input.Size());
-    }
-} // namespace nodes
-} // namespace ell
diff --git a/libraries/nodes/tcc/ValueSelectorNode.tcc b/libraries/nodes/tcc/ValueSelectorNode.tcc
deleted file mode 100644
index 7015a21a6..000000000
--- a/libraries/nodes/tcc/ValueSelectorNode.tcc
+++ /dev/null
@@ -1,80 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     ValueSelectorNode.tcc (nodes)
-//  Authors:  Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace nodes
-{
-    template <typename ValueType>
-    ValueSelectorNode<ValueType>::ValueSelectorNode() :
-        Node({ &_condition, &_input1, &_input2 }, { &_output }),
-        _condition(this, {}, conditionPortName),
-        _input1(this, {}, defaultInput1PortName),
-        _input2(this, {}, defaultInput2PortName),
-        _output(this, defaultOutputPortName, 0)
-    {
-    }
-
-    template <typename ValueType>
-    ValueSelectorNode<ValueType>::ValueSelectorNode(const model::OutputPort<bool>& condition, const model::OutputPort<ValueType>& input1, const model::OutputPort<ValueType>& input2) :
-        Node({ &_condition, &_input1, &_input2 }, { &_output }),
-        _condition(this, condition, conditionPortName),
-        _input1(this, input1, defaultInput1PortName),
-        _input2(this, input2, defaultInput2PortName),
-        _output(this, defaultOutputPortName, input1.Size())
-    {
-        if (condition.Size() != 1)
-        {
-            throw ell::utilities::Exception("Error: Condition must be 1-D signal");
-        }
-
-        if (input1.Size() != input2.Size())
-        {
-            throw ell::utilities::Exception("Error: input values must be same dimension");
-        }
-    };
-
-    template <typename ValueType>
-    void ValueSelectorNode<ValueType>::Compute() const
-    {
-        bool cond = _condition[0];
-        _output.SetOutput(cond ? _input1.GetValue() : _input2.GetValue());
-    };
-
-    template <typename ValueType>
-    void ValueSelectorNode<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
-    {
-        Node::WriteToArchive(archiver);
-        archiver[defaultInput1PortName] << _input1;
-        archiver[defaultInput2PortName] << _input2;
-        archiver[conditionPortName] << _condition;
-    }
-
-    template <typename ValueType>
-    void ValueSelectorNode<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
-    {
-        Node::ReadFromArchive(archiver);
-        archiver[defaultInput1PortName] >> _input1;
-        archiver[defaultInput2PortName] >> _input2;
-        archiver[conditionPortName] >> _condition;
-        _output.SetSize(_input1.Size());
-    }
-
-    template <typename ValueType>
-    void ValueSelectorNode<ValueType>::Copy(model::ModelTransformer& transformer) const
-    {
-        const auto& newCondition = transformer.GetCorrespondingInputs(_condition);
-        const auto& newPortElements1 = transformer.GetCorrespondingInputs(_input1);
-        const auto& newPortElements2 = transformer.GetCorrespondingInputs(_input2);
-
-        auto newNode = transformer.AddNode<ValueSelectorNode<ValueType>>(newCondition, newPortElements1, newPortElements2);
-
-        transformer.MapNodeOutput(output, newNode->output);
-    }
-} // namespace nodes
-} // namespace ell
diff --git a/libraries/nodes/tcc/VoiceActivityDetectorNode.tcc b/libraries/nodes/tcc/VoiceActivityDetectorNode.tcc
deleted file mode 100644
index 6ccc55e02..000000000
--- a/libraries/nodes/tcc/VoiceActivityDetectorNode.tcc
+++ /dev/null
@@ -1,41 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     VoiceActivityDetectorNode.tcc (nodes)
-//  Authors:  Chris Lovett
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#include <utilities/include/Unused.h>
-
-namespace ell
-{
-namespace nodes
-{
-    template <typename ValueType>
-    VoiceActivityDetectorNode<ValueType>::VoiceActivityDetectorNode() :
-        CompilableNode({ &_input }, { &_output }),
-        _input(this, {}, defaultInputPortName),
-        _output(this, defaultOutputPortName, 1)
-    {
-    }
-
-    template <typename ValueType>
-    VoiceActivityDetectorNode<ValueType>::VoiceActivityDetectorNode(const model::OutputPort<ValueType>& input,
-                                                                    double sampleRate,
-                                                                    double frameDuration,
-                                                                    double tauUp,
-                                                                    double tauDown,
-                                                                    double largeInput,
-                                                                    double gainAtt,
-                                                                    double thresholdUp,
-                                                                    double thresholdDown,
-                                                                    double levelThreshold) :
-        CompilableNode({ &_input }, { &_output }),
-        _input(this, input, defaultInputPortName),
-        _output(this, defaultOutputPortName, 1),
-        _vad(sampleRate, input.Size(), frameDuration, tauUp, tauDown, largeInput, gainAtt, thresholdUp, thresholdDown, levelThreshold)
-    {
-    }
-} // namespace nodes
-} // namespace ell
diff --git a/libraries/nodes/test/src/DSPNodesTests.cpp b/libraries/nodes/test/src/DSPNodesTests.cpp
index d022de135..a136a4212 100644
--- a/libraries/nodes/test/src/DSPNodesTests.cpp
+++ b/libraries/nodes/test/src/DSPNodesTests.cpp
@@ -45,9 +45,9 @@
 
 #include <predictors/include/NeuralNetworkPredictor.h>
 
+#include <predictors/neural/include/ConvolutionalLayer.h>
 #include <predictors/neural/include/SigmoidActivation.h>
 #include <predictors/neural/include/TanhActivation.h>
-#include <predictors/neural/include/ConvolutionalLayer.h>
 
 #include <testing/include/testing.h>
 
diff --git a/libraries/passes/CMakeLists.txt b/libraries/passes/CMakeLists.txt
index f674a6f2e..f0db0a711 100644
--- a/libraries/passes/CMakeLists.txt
+++ b/libraries/passes/CMakeLists.txt
@@ -18,18 +18,14 @@ set(include
     include/StandardPasses.h
 )
 
-set(tcc
-)
-
 set(doc
 )
 
 source_group("src" FILES ${src})
 source_group("include" FILES ${include})
-source_group("tcc" FILES ${tcc})
 source_group("doc" FILES ${doc})
 
-add_library(${library_name} ${src} ${include} ${tcc} ${doc})
+add_library(${library_name} ${src} ${include} ${doc})
 target_include_directories(${library_name} PRIVATE include ${ELL_LIBRARIES_DIR})
 target_link_libraries(${library_name} model nodes)
 
@@ -50,9 +46,6 @@ set(test_include
     test/include/ModelOptimizerTest.h
 )
 
-set(test_tcc
-)
-
 source_group("src" FILES ${test_src})
 source_group("include" FILES ${test_include})
 
diff --git a/libraries/predictors/CMakeLists.txt b/libraries/predictors/CMakeLists.txt
index a55050348..a9293ab2c 100644
--- a/libraries/predictors/CMakeLists.txt
+++ b/libraries/predictors/CMakeLists.txt
@@ -22,14 +22,6 @@ set(include
     include/SingleElementThresholdPredictor.h
 )
 
-set(tcc
-    tcc/ForestPredictor.tcc
-    tcc/LinearPredictor.tcc
-    tcc/NeuralNetworkPredictor.tcc
-    tcc/Normalizer.tcc
-    tcc/SignPredictor.tcc
-)
-
 set(neural_include
     neural/include/Activation.h
     neural/include/ActivationLayer.h
@@ -57,38 +49,12 @@ set(neural_include
 set(neural_src
     neural/src/Activation.cpp)
 
-set(neural_tcc
-    neural/tcc/Activation.tcc
-    neural/tcc/ActivationLayer.tcc
-    neural/tcc/BatchNormalizationLayer.tcc
-    neural/tcc/BiasLayer.tcc
-    neural/tcc/BinaryConvolutionalLayer.tcc
-    neural/tcc/ConvolutionalLayer.tcc
-    neural/tcc/FullyConnectedLayer.tcc
-    neural/tcc/HardSigmoidActivation.tcc
-    neural/tcc/InputLayer.tcc
-    neural/tcc/Layer.tcc
-    neural/tcc/LeakyReLUActivation.tcc
-    neural/tcc/MaxPoolingFunction.tcc
-    neural/tcc/MeanPoolingFunction.tcc
-    neural/tcc/ParametricReLUActivation.tcc
-    neural/tcc/PoolingLayer.tcc
-    neural/tcc/ReLUActivation.tcc
-    neural/tcc/RegionDetectionLayer.tcc
-    neural/tcc/ScalingLayer.tcc
-    neural/tcc/SigmoidActivation.tcc
-    neural/tcc/SoftmaxLayer.tcc
-    neural/tcc/TanhActivation.tcc
-)
-
 source_group("src" FILES ${src})
 source_group("include" FILES ${include})
-source_group("tcc" FILES ${tcc})
 source_group("neural\\include" FILES ${neural_include})
 source_group("neural\\src" FILES ${neural_src})
-source_group("neural\\tcc" FILES ${neural_tcc})
 
-add_library(${library_name} ${src} ${include} ${tcc} ${neural_src} ${neural_include} ${neural_tcc})
+add_library(${library_name} ${src} ${include} ${neural_src} ${neural_include})
 target_include_directories(${library_name} PRIVATE include neural/include ${ELL_LIBRARIES_DIR})
 target_link_libraries(${library_name} data dsp math utilities)
 
@@ -113,16 +79,10 @@ set(test_include
     test/include/ProtoNNPredictorTests.h
 )
 
-set(test_tcc
-    test/tcc/LinearPredictorTests.tcc
-    test/tcc/NeuralNetworkPredictorTests.tcc
-)
-
 source_group("src" FILES ${test_src})
 source_group("include" FILES ${test_include})
-source_group("tcc" FILES ${test_tcc})
 
-add_executable(${test_name} ${test_src} ${test_include} ${test_tcc})
+add_executable(${test_name} ${test_src} ${test_include})
 target_include_directories(${test_name} PRIVATE test/include ${ELL_LIBRARIES_DIR})
 
 target_link_libraries(${test_name} common data predictors testing )
diff --git a/libraries/predictors/include/ForestPredictor.h b/libraries/predictors/include/ForestPredictor.h
index 80ee4934e..4e7b1a5e7 100644
--- a/libraries/predictors/include/ForestPredictor.h
+++ b/libraries/predictors/include/ForestPredictor.h
@@ -356,4 +356,412 @@ namespace predictors
 } // namespace predictors
 } // namespace ell
 
-#include "../tcc/ForestPredictor.tcc"
+#pragma region implementation
+
+#include <utilities/include/Exception.h>
+
+namespace ell
+{
+namespace predictors
+{
+    template <typename SplitRuleType, typename EdgePredictorType>
+    ForestPredictor<SplitRuleType, EdgePredictorType>::SplittableNodeId::SplittableNodeId(size_t parentNodeIndex, size_t childPosition) :
+        _isRoot(false),
+        _parentNodeIndex(parentNodeIndex),
+        _childPosition(childPosition)
+    {
+    }
+
+    template <typename SplitRuleType, typename EdgePredictorType>
+    ForestPredictor<SplitRuleType, EdgePredictorType>::SplitAction::SplitAction(SplittableNodeId nodeId, SplitRuleType _splitRule, std::vector<EdgePredictorType> edgePredictors) :
+        _nodeId(std::move(nodeId)),
+        _splitRule(std::move(_splitRule)),
+        _edgePredictors(std::move(edgePredictors))
+    {
+    }
+
+    template <typename SplitRuleType, typename EdgePredictorType>
+    ForestPredictor<SplitRuleType, EdgePredictorType>::Edge::Edge(const EdgePredictorType& predictor) :
+        _predictor(predictor),
+        _targetNodeIndex(0)
+    {
+    }
+
+    template <typename SplitRuleType, typename EdgePredictorType>
+    void ForestPredictor<SplitRuleType, EdgePredictorType>::Edge::SetTargetNodeIndex(size_t targetNodeIndex)
+    {
+        _targetNodeIndex = targetNodeIndex;
+    }
+
+    template <typename SplitRuleType, typename EdgePredictorType>
+    bool ForestPredictor<SplitRuleType, EdgePredictorType>::IsTrivial() const
+    {
+        if (_rootIndices.size() == 0 && _bias == 0.0)
+        {
+            return true;
+        }
+        else
+        {
+            return false;
+        }
+    }
+
+    template <typename SplitRuleType, typename EdgePredictorType>
+    size_t ForestPredictor<SplitRuleType, EdgePredictorType>::NumInteriorNodes(size_t interiorNodeIndex) const
+    {
+        if (interiorNodeIndex >= _interiorNodes.size())
+        {
+            return 0;
+        }
+
+        auto const& interiorNode = _interiorNodes[interiorNodeIndex];
+        size_t numInteriorNodes = 1;
+
+        for (const auto& edge : interiorNode._outgoingEdges)
+        {
+            if (edge.IsTargetInterior())
+            {
+                numInteriorNodes += NumInteriorNodes(edge.GetTargetNodeIndex());
+            }
+        }
+
+        return numInteriorNodes;
+    }
+
+    template <typename SplitRuleType, typename EdgePredictorType>
+    size_t ForestPredictor<SplitRuleType, EdgePredictorType>::NumEdges(size_t interiorNodeIndex) const
+    {
+        if (interiorNodeIndex >= _interiorNodes.size())
+        {
+            return 0;
+        }
+
+        auto const& interiorNode = _interiorNodes[interiorNodeIndex];
+        size_t numEdges = interiorNode._outgoingEdges.size();
+
+        for (const auto& edge : interiorNode._outgoingEdges)
+        {
+            if (edge.IsTargetInterior())
+            {
+                numEdges += NumEdges(edge.GetTargetNodeIndex());
+            }
+        }
+
+        return numEdges;
+    }
+
+    template <typename SplitRuleType, typename EdgePredictorType>
+    double ForestPredictor<SplitRuleType, EdgePredictorType>::Predict(const DataVectorType& input) const
+    {
+        double output = _bias;
+        for (auto treeRootIndex : _rootIndices)
+        {
+            output += Predict(input, treeRootIndex);
+        }
+        return output;
+    }
+
+    template <typename SplitRuleType, typename EdgePredictorType>
+    double ForestPredictor<SplitRuleType, EdgePredictorType>::Predict(const DataVectorType& input, size_t interiorNodeIndex) const
+    {
+        if (interiorNodeIndex >= _interiorNodes.size())
+        {
+            return 0.0;
+        }
+
+        double output = 0.0;
+
+        VisitEdgePathToLeaf(input, interiorNodeIndex, [&](const InteriorNode& interiorNode, size_t edgePosition) { output += interiorNode._outgoingEdges[edgePosition]._predictor.Predict(input); });
+
+        return output;
+    }
+
+    template <typename SplitRuleType, typename EdgePredictorType>
+    std::vector<bool> ForestPredictor<SplitRuleType, EdgePredictorType>::GetEdgeIndicatorVector(const DataVectorType& input) const
+    {
+        std::vector<bool> edgeIndicator(_numEdges);
+        for (auto treeRootIndex : _rootIndices)
+        {
+            SetEdgeIndicatorVector(input, edgeIndicator, treeRootIndex);
+        }
+        return edgeIndicator;
+    }
+
+    template <typename SplitRuleType, typename EdgePredictorType>
+    std::vector<bool> ForestPredictor<SplitRuleType, EdgePredictorType>::GetEdgeIndicatorVector(const DataVectorType& input, size_t interiorNodeIndex) const
+    {
+        std::vector<bool> edgeIndicator(_numEdges);
+        SetEdgeIndicatorVector(input, edgeIndicator, interiorNodeIndex);
+        return edgeIndicator;
+    }
+
+    template <typename SplitRuleType, typename EdgePredictorType>
+    size_t ForestPredictor<SplitRuleType, EdgePredictorType>::NumChildren(size_t interiorNodeIndex) const
+    {
+        if (interiorNodeIndex >= _interiorNodes.size())
+        {
+            return 0;
+        }
+        return _interiorNodes[interiorNodeIndex]._outgoingEdges.size();
+    }
+
+    template <typename SplitRuleType, typename EdgePredictorType>
+    typename ForestPredictor<SplitRuleType, EdgePredictorType>::SplittableNodeId ForestPredictor<SplitRuleType, EdgePredictorType>::GetChildId(size_t parentNodeIndex, size_t childPosition) const
+    {
+        // check that the parent exists
+        if (parentNodeIndex >= _interiorNodes.size())
+        {
+            throw utilities::LogicException(utilities::LogicExceptionErrors::illegalState, "invalid identifier requested - parent does not exist");
+        }
+
+        // check that the splittable node exists
+        if (childPosition >= _interiorNodes[parentNodeIndex]._outgoingEdges.size())
+        {
+            throw utilities::LogicException(utilities::LogicExceptionErrors::illegalState, "invalid identifier requested - child does not exist");
+        }
+
+        return SplittableNodeId(parentNodeIndex, childPosition);
+    }
+
+    template <typename SplitRuleType, typename EdgePredictorType>
+    size_t ForestPredictor<SplitRuleType, EdgePredictorType>::Split(const SplitAction& splitAction)
+    {
+        if (splitAction._nodeId._isRoot)
+        {
+            // add interior Node
+            size_t interiorNodeIndex = AddInteriorNode(splitAction);
+
+            // add new tree
+            _rootIndices.push_back(interiorNodeIndex);
+
+            // return ID of new root
+            return interiorNodeIndex;
+        }
+        else
+        {
+            // check that this node wasn't previously split
+            auto& incomingEdge = _interiorNodes[splitAction._nodeId._parentNodeIndex]._outgoingEdges[splitAction._nodeId._childPosition];
+            if (incomingEdge.IsTargetInterior())
+            {
+                throw utilities::LogicException(utilities::LogicExceptionErrors::illegalState, "invalid split in decision tree - node previously split");
+            }
+
+            // add interior Node
+            size_t interiorNodeIndex = AddInteriorNode(splitAction);
+
+            // update the parent about the new interior node
+            incomingEdge.SetTargetNodeIndex(interiorNodeIndex);
+
+            // return ID of new interior node
+            return interiorNodeIndex;
+        }
+    }
+
+    template <typename SplitRuleType, typename EdgePredictorType>
+    void ForestPredictor<SplitRuleType, EdgePredictorType>::AddToBias(double value)
+    {
+        _bias += value;
+    }
+
+    template <typename SplitRuleType, typename EdgePredictorType>
+    void ForestPredictor<SplitRuleType, EdgePredictorType>::WriteToArchive(utilities::Archiver& archiver) const
+    {
+        archiver["interiorNodes"] << _interiorNodes;
+        archiver["rootIndices"] << _rootIndices;
+        archiver["bias"] << _bias;
+        archiver["numEdges"] << _numEdges;
+    }
+
+    template <typename SplitRuleType, typename EdgePredictorType>
+    void ForestPredictor<SplitRuleType, EdgePredictorType>::ReadFromArchive(utilities::Unarchiver& archiver)
+    {
+        archiver["interiorNodes"] >> _interiorNodes;
+        archiver["rootIndices"] >> _rootIndices;
+        archiver["bias"] >> _bias;
+        archiver["numEdges"] >> _numEdges;
+    }
+
+    template <typename SplitRuleType, typename EdgePredictorType>
+    void ForestPredictor<SplitRuleType, EdgePredictorType>::SetEdgeIndicatorVector(const DataVectorType& input, std::vector<bool>& output, size_t interiorNodeIndex) const
+    {
+        if (interiorNodeIndex >= _interiorNodes.size())
+        {
+            return;
+        }
+        VisitEdgePathToLeaf(input, interiorNodeIndex, [&output](const InteriorNode& interiorNode, size_t edgePosition) { output[interiorNode._firstEdgeIndex + edgePosition] = true; });
+    }
+
+    template <typename SplitRuleType, typename EdgePredictorType>
+    size_t ForestPredictor<SplitRuleType, EdgePredictorType>::AddInteriorNode(const SplitAction& splitAction)
+    {
+        size_t numEdges = splitAction._edgePredictors.size();
+
+        // check correctness of splitAction
+        if (numEdges != splitAction._splitRule.NumOutputs())
+        {
+            throw utilities::LogicException(utilities::LogicExceptionErrors::illegalState, "invalid split in decision tree - number of split rule outputs doesn't match fan-out");
+        }
+
+        // get indices
+        size_t interiorNodeIndex = _interiorNodes.size();
+
+        // create the new interior node
+        InteriorNode interiorNode(splitAction, _numEdges);
+        _interiorNodes.push_back(std::move(interiorNode));
+
+        // increment global edge count
+        _numEdges += numEdges;
+
+        return interiorNodeIndex;
+    }
+
+    template <typename SplitRuleType, typename EdgePredictorType>
+    void ForestPredictor<SplitRuleType, EdgePredictorType>::VisitEdgePathToLeaf(const DataVectorType& input, size_t interiorNodeIndex, std::function<void(const InteriorNode&, size_t edgePosition)> operation) const
+    {
+        size_t nodeIndex = interiorNodeIndex;
+
+        do
+        {
+            const auto& interiorNode = _interiorNodes[nodeIndex];
+
+            // which way do we go?
+            int edgePosition = static_cast<int>(interiorNode._splitRule.Predict(input));
+
+            // check for early eject
+            if (edgePosition < 0)
+            {
+                break;
+            }
+
+            // apply the operation
+            operation(interiorNode, edgePosition);
+
+            //follow the edge to the next node
+            const auto& edge = interiorNode._outgoingEdges[edgePosition];
+            nodeIndex = edge.GetTargetNodeIndex();
+        } while (nodeIndex != 0);
+    }
+
+    //
+    // InteriorNode
+    //
+    template <typename SplitRuleType, typename EdgePredictorType>
+    ForestPredictor<SplitRuleType, EdgePredictorType>::InteriorNode::InteriorNode(const SplitAction& splitAction, size_t _firstEdgeIndex) :
+        _splitRule(splitAction._splitRule),
+        _firstEdgeIndex(_firstEdgeIndex)
+    {
+        std::copy(splitAction._edgePredictors.begin(), splitAction._edgePredictors.end(), std::back_inserter(_outgoingEdges));
+    }
+
+    template <typename SplitRuleType, typename EdgePredictorType>
+    void ForestPredictor<SplitRuleType, EdgePredictorType>::InteriorNode::WriteToArchive(utilities::Archiver& archiver) const
+    {
+        archiver["splitRule"] << _splitRule;
+        archiver["outgoingEdges"] << _outgoingEdges;
+        archiver["firstEdgeIndex"] << _firstEdgeIndex;
+    }
+
+    template <typename SplitRuleType, typename EdgePredictorType>
+    void ForestPredictor<SplitRuleType, EdgePredictorType>::InteriorNode::ReadFromArchive(utilities::Unarchiver& archiver)
+    {
+        archiver["splitRule"] >> _splitRule;
+        archiver["outgoingEdges"] >> _outgoingEdges;
+        archiver["firstEdgeIndex"] >> _firstEdgeIndex;
+    }
+
+    //
+    // debugging code
+    //
+
+    template <typename SplitRuleType, typename EdgePredictorType>
+    void ForestPredictor<SplitRuleType, EdgePredictorType>::SplittableNodeId::Print(std::ostream& os) const
+    {
+        if (_isRoot)
+        {
+            os << "root";
+        }
+        else
+        {
+            os << "child " << _childPosition << " of node " << _parentNodeIndex;
+        }
+    }
+
+    template <typename SplitRuleType, typename EdgePredictorType>
+    void ForestPredictor<SplitRuleType, EdgePredictorType>::SplitAction::PrintLine(std::ostream& os, size_t tabs) const
+    {
+        os << std::string(tabs * 4, ' ') << "action = split ";
+        _nodeId.Print(os);
+        os << "\n";
+
+        os << std::string(tabs * 4, ' ') << "rule:\n";
+        _splitRule.PrintLine(os, tabs + 1);
+
+        os << std::string(tabs * 4, ' ') << "edge predictors:\n";
+        for (const auto& predictor : _edgePredictors)
+        {
+            predictor.PrintLine(os, tabs + 1);
+        }
+    }
+
+    //
+    // debugging members
+    //
+
+    template <typename SplitRuleType, typename EdgePredictorType>
+    void ForestPredictor<SplitRuleType, EdgePredictorType>::PrintLine(std::ostream& os, size_t tabs) const
+    {
+        os << std::string(tabs * 4, ' ') << "Forest Predictor: bias = " << _bias << "\n";
+        for (const auto& interiorNode : _interiorNodes)
+        {
+            interiorNode.PrintLine(os, tabs + 1);
+        }
+        for (auto treeRootIndex : _rootIndices)
+        {
+            os << std::string(tabs * 4, ' ') << "Tree: root index = " << treeRootIndex << "\n";
+        }
+    }
+
+    template <typename SplitRuleType, typename EdgePredictorType>
+    void ForestPredictor<SplitRuleType, EdgePredictorType>::InteriorNode::PrintLine(std::ostream& os, size_t tabs) const
+    {
+        os << std::string(tabs * 4, ' ') << "InteriorNode:\n";
+        _splitRule.PrintLine(os, tabs + 1);
+        for (const auto& edge : _outgoingEdges)
+        {
+            edge.PrintLine(os, tabs + 1);
+        }
+    }
+    //
+    // Edge
+    //
+    template <typename SplitRuleType, typename EdgePredictorType>
+    void ForestPredictor<SplitRuleType, EdgePredictorType>::Edge::PrintLine(std::ostream& os, size_t tabs) const
+    {
+        os << std::string(tabs * 4, ' ') << "Edge:\n";
+        _predictor.PrintLine(os, tabs + 1);
+        os << std::string(tabs * 4, ' ') << "Target node index = " << _targetNodeIndex << "\n";
+    }
+
+    template <typename SplitRuleType, typename EdgePredictorType>
+    bool ForestPredictor<SplitRuleType, EdgePredictorType>::Edge::IsTargetInterior() const
+    {
+        return _targetNodeIndex == 0 ? false : true;
+    }
+
+    template <typename SplitRuleType, typename EdgePredictorType>
+    void ForestPredictor<SplitRuleType, EdgePredictorType>::Edge::WriteToArchive(utilities::Archiver& archiver) const
+    {
+        archiver["predictor"] << _predictor;
+        archiver["targetNodeIndex"] << _targetNodeIndex;
+    }
+
+    template <typename SplitRuleType, typename EdgePredictorType>
+    void ForestPredictor<SplitRuleType, EdgePredictorType>::Edge::ReadFromArchive(utilities::Unarchiver& archiver)
+    {
+        archiver["predictor"] >> _predictor;
+        archiver["targetNodeIndex"] >> _targetNodeIndex;
+    }
+} // namespace predictors
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/predictors/include/LinearPredictor.h b/libraries/predictors/include/LinearPredictor.h
index 2bc676fb2..04a0fba6b 100644
--- a/libraries/predictors/include/LinearPredictor.h
+++ b/libraries/predictors/include/LinearPredictor.h
@@ -132,4 +132,95 @@ namespace predictors
 } // namespace predictors
 } // namespace ell
 
-#include "../tcc/LinearPredictor.tcc"
+#pragma region implementation
+
+#include <data/include/DataVectorOperations.h>
+
+#include <math/include/VectorOperations.h>
+
+#include <memory>
+
+namespace ell
+{
+namespace predictors
+{
+    template <typename ElementType>
+    LinearPredictor<ElementType>::LinearPredictor(size_t dim) :
+        _w(dim),
+        _b(0)
+    {
+    }
+
+    template <typename ElementType>
+    LinearPredictor<ElementType>::LinearPredictor(const math::ColumnVector<ElementType>& weights, ElementType bias) :
+        _w(weights),
+        _b(bias)
+    {
+    }
+
+    template <typename ElementType>
+    template <typename OtherElementType>
+    LinearPredictor<ElementType>::LinearPredictor(const LinearPredictor<OtherElementType>& other) :
+        _b(other.GetBias())
+    {
+        auto weights = other.GetWeights();
+        _w.Resize(weights.Size());
+        for (size_t i = 0; i < weights.Size(); ++i)
+        {
+            _w[i] = static_cast<ElementType>(weights[i]);
+        }
+    }
+
+    template <typename ElementType>
+    void LinearPredictor<ElementType>::Reset()
+    {
+        _w.Reset();
+        _b = 0;
+    }
+
+    template <typename ElementType>
+    void LinearPredictor<ElementType>::Resize(size_t size)
+    {
+        _w.Resize(size);
+    }
+
+    template <typename ElementType>
+    ElementType LinearPredictor<ElementType>::Predict(const DataVectorType& dataVector) const
+    {
+        return _w * dataVector + _b;
+    }
+
+    template <typename ElementType>
+    auto LinearPredictor<ElementType>::GetWeightedElements(const DataVectorType& dataVector) const -> DataVectorType
+    {
+        auto transformation = [&](data::IndexValue indexValue) -> ElementType { return indexValue.value * _w[indexValue.index]; };
+        return dataVector.TransformAs<data::IterationPolicy::skipZeros, DataVectorType>(transformation);
+    }
+
+    template <typename ElementType>
+    void LinearPredictor<ElementType>::Scale(ElementType scalar)
+    {
+        _w *= scalar;
+        _b *= scalar;
+    }
+
+    template <typename ElementType>
+    void LinearPredictor<ElementType>::WriteToArchive(utilities::Archiver& archiver) const
+    {
+        auto w = _w.ToArray();
+        archiver["w"] << w;
+        archiver["b"] << _b;
+    }
+
+    template <typename ElementType>
+    void LinearPredictor<ElementType>::ReadFromArchive(utilities::Unarchiver& archiver)
+    {
+        std::vector<ElementType> w;
+        archiver["w"] >> w;
+        _w = math::ColumnVector<ElementType>(std::move(w));
+        archiver["b"] >> _b;
+    }
+} // namespace predictors
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/predictors/include/NeuralNetworkPredictor.h b/libraries/predictors/include/NeuralNetworkPredictor.h
index 9fb88446c..244a77aa4 100644
--- a/libraries/predictors/include/NeuralNetworkPredictor.h
+++ b/libraries/predictors/include/NeuralNetworkPredictor.h
@@ -160,4 +160,209 @@ namespace predictors
 } // namespace predictors
 } // namespace ell
 
-#include "../tcc/NeuralNetworkPredictor.tcc"
+#pragma region implementation
+
+//stl
+#include <iostream>
+
+namespace ell
+{
+namespace predictors
+{
+    constexpr utilities::ArchiveVersion c_currentNeuralNetworkPredictorArchiveVersion = { utilities::ArchiveVersionNumbers::v1 };
+
+    template <typename ElementType>
+    NeuralNetworkPredictor<ElementType>::NeuralNetworkPredictor(InputLayerReference&& inputLayer, Layers&& layers) :
+        _inputLayer(std::move(inputLayer)),
+        _layers(std::move(layers)),
+        _output(_layers.back()->GetOutput().Size())
+    {
+    }
+
+    template <typename ElementType>
+    void NeuralNetworkPredictor<ElementType>::RemoveLastLayers(size_t numberToRemove)
+    {
+        if (_layers.size() > numberToRemove)
+        {
+            _layers.resize(_layers.size() - numberToRemove);
+            _output.resize(_layers.back()->GetOutput().Size());
+        }
+        else
+        {
+            throw utilities::InputException(utilities::InputExceptionErrors::sizeMismatch, "RemoveLastLayers numberToRemove exceeds number of layers.");
+        }
+    }
+
+    template <typename ElementType>
+    typename NeuralNetworkPredictor<ElementType>::Shape NeuralNetworkPredictor<ElementType>::GetInputShape() const
+    {
+        if (_inputLayer != nullptr)
+        {
+            return _inputLayer->GetInputShape();
+        }
+        return { 0, 0, 0 };
+    }
+
+    template <typename ElementType>
+    typename NeuralNetworkPredictor<ElementType>::Shape NeuralNetworkPredictor<ElementType>::GetOutputShape() const
+    {
+        if (_layers.size() > 0)
+        {
+            return _layers.back()->GetOutputShape();
+        }
+        return { 0, 0, 0 };
+    }
+
+    template <typename ElementType>
+    const std::vector<ElementType>& NeuralNetworkPredictor<ElementType>::Predict(const DataVectorType& dataVector) const
+    {
+        if (_inputLayer != nullptr)
+        {
+            _inputLayer->SetInput(dataVector);
+            _inputLayer->Compute();
+        }
+        Compute();
+        return _output;
+    }
+
+    template <typename ElementType>
+    const std::vector<ElementType>& NeuralNetworkPredictor<ElementType>::Predict(const std::vector<ElementType>& input) const
+    {
+        if (_inputLayer != nullptr)
+        {
+            _inputLayer->SetInput(input);
+            _inputLayer->Compute();
+        }
+        Compute();
+        return _output;
+    }
+
+    template <typename ElementType>
+    void NeuralNetworkPredictor<ElementType>::Compute() const
+    {
+        // Forward feed inputs through the layers
+        for (size_t i = 0; i < _layers.size(); i++)
+        {
+            _layers[i]->Compute();
+            // Uncomment the following line to print layer info
+            //_layers[i]->Print(std::cout);
+        }
+
+        if (_layers.size() > 0)
+        {
+            auto output = _layers.back()->GetOutput();
+            size_t vectorIndex = 0;
+
+            //_output.resize(output.NumElements());
+            for (size_t i = 0; i < output.NumRows(); i++)
+            {
+                for (size_t j = 0; j < output.NumColumns(); j++)
+                {
+                    for (size_t k = 0; k < output.NumChannels(); k++)
+                    {
+                        _output[vectorIndex++] = output(i, j, k);
+                    }
+                }
+            }
+        }
+        else
+        {
+            _output.assign(_output.size(), 0);
+        }
+    }
+
+    template <typename ElementType>
+    void NeuralNetworkPredictor<ElementType>::Reset()
+    {
+        for (size_t i = 0; i < _layers.size(); i++)
+        {
+            _layers[i]->Reset();
+        }
+    }
+
+    template <typename ElementType>
+    void NeuralNetworkPredictor<ElementType>::WriteToArchive(utilities::Archiver& archiver) const
+    {
+        archiver["inputLayer"] << _inputLayer.get();
+
+        std::vector<const neural::Layer<ElementType>*> layerElements;
+        for (size_t i = 0; i < _layers.size(); i++)
+        {
+            layerElements.emplace_back(_layers[i].get());
+        }
+        archiver["layers"] << layerElements;
+        archiver["output"] << _output;
+    }
+
+    template <typename ElementType>
+    void NeuralNetworkPredictor<ElementType>::ReadFromArchive(utilities::Unarchiver& archiver)
+    {
+        neural::LayerSerializationContext<ElementType> layerContext(archiver.GetContext());
+        archiver.PushContext(layerContext);
+
+        std::unique_ptr<neural::InputLayer<ElementType>> inputLayer;
+        archiver["inputLayer"] >> inputLayer;
+        _inputLayer = std::move(inputLayer);
+
+        std::vector<const neural::Layer<ElementType>*> layerElements;
+        archiver["layers"] >> layerElements;
+        _layers.resize(layerElements.size());
+        for (size_t i = 0; i < layerElements.size(); i++)
+        {
+            _layers[i].reset((neural::Layer<ElementType>*)layerElements[i]);
+        }
+        archiver["output"] >> _output;
+
+        archiver.PopContext();
+    }
+
+    template <typename ElementType>
+    void NeuralNetworkPredictor<ElementType>::RegisterNeuralNetworkPredictorTypes(utilities::SerializationContext& context)
+    {
+        using namespace ell::predictors::neural;
+
+        context.GetTypeFactory().AddType<neural::InputLayer<ElementType>, neural::InputLayer<ElementType>>();
+        context.GetTypeFactory().AddType<neural::Layer<ElementType>, neural::ActivationLayer<ElementType>>();
+        context.GetTypeFactory().AddType<neural::Layer<ElementType>, neural::BatchNormalizationLayer<ElementType>>();
+        context.GetTypeFactory().AddType<neural::Layer<ElementType>, neural::BiasLayer<ElementType>>();
+        context.GetTypeFactory().AddType<neural::Layer<ElementType>, neural::BinaryConvolutionalLayer<ElementType>>();
+        context.GetTypeFactory().AddType<neural::Layer<ElementType>, neural::ConvolutionalLayer<ElementType>>();
+        context.GetTypeFactory().AddType<neural::Layer<ElementType>, neural::FullyConnectedLayer<ElementType>>();
+        context.GetTypeFactory().AddType<neural::Layer<ElementType>, neural::PoolingLayer<ElementType, MaxPoolingFunction>>();
+        context.GetTypeFactory().AddType<neural::Layer<ElementType>, neural::PoolingLayer<ElementType, MeanPoolingFunction>>();
+        context.GetTypeFactory().AddType<neural::Layer<ElementType>, neural::RegionDetectionLayer<ElementType>>();
+        context.GetTypeFactory().AddType<neural::Layer<ElementType>, neural::ScalingLayer<ElementType>>();
+        context.GetTypeFactory().AddType<neural::Layer<ElementType>, neural::SoftmaxLayer<ElementType>>();
+        context.GetTypeFactory().AddType<NeuralNetworkPredictor<ElementType>, NeuralNetworkPredictor<ElementType>>();
+
+        // Map the old type names to the new ones for compatibility reasons.
+        context.GetTypeFactory().AddType<neural::Layer<ElementType>, neural::ActivationLayer<float>>("ActivationLayer<float,SigmoidActivation>");
+        context.GetTypeFactory().AddType<neural::Layer<ElementType>, neural::ActivationLayer<float>>("ActivationLayer<float,HardSigmoidActivation<float>>");
+        context.GetTypeFactory().AddType<neural::Layer<ElementType>, neural::ActivationLayer<float>>("ActivationLayer<float,ReLUActivation>");
+        context.GetTypeFactory().AddType<neural::Layer<ElementType>, neural::ActivationLayer<float>>("ActivationLayer<float,LeakyReLUActivation>");
+        context.GetTypeFactory().AddType<neural::Layer<ElementType>, neural::ActivationLayer<float>>("ActivationLayer<float,TanhActivation>");
+        context.GetTypeFactory().AddType<neural::Layer<ElementType>, neural::ActivationLayer<float>>("ActivationLayer<float,ParametricReLUActivation>");
+
+        context.GetTypeFactory().AddType<neural::Layer<ElementType>, neural::ActivationLayer<double>>("ActivationLayer<double,SigmoidActivation>");
+        context.GetTypeFactory().AddType<neural::Layer<ElementType>, neural::ActivationLayer<double>>("ActivationLayer<double,HardSigmoidActivation<double>>");
+        context.GetTypeFactory().AddType<neural::Layer<ElementType>, neural::ActivationLayer<double>>("ActivationLayer<double,ReLUActivation>");
+        context.GetTypeFactory().AddType<neural::Layer<ElementType>, neural::ActivationLayer<double>>("ActivationLayer<double,LeakyReLUActivation>");
+        context.GetTypeFactory().AddType<neural::Layer<ElementType>, neural::ActivationLayer<double>>("ActivationLayer<double,TanhActivation>");
+        context.GetTypeFactory().AddType<neural::Layer<ElementType>, neural::ActivationLayer<double>>("ActivationLayer<double,ParametricReLUActivation>");
+    }
+
+    template <typename ElementType>
+    utilities::ArchiveVersion NeuralNetworkPredictor<ElementType>::GetCurrentArchiveVersion()
+    {
+        return c_currentNeuralNetworkPredictorArchiveVersion;
+    }
+
+    template <typename ElementType>
+    utilities::ArchiveVersion NeuralNetworkPredictor<ElementType>::GetArchiveVersion() const
+    {
+        return GetCurrentArchiveVersion();
+    }
+} // namespace predictors
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/predictors/include/Normalizer.h b/libraries/predictors/include/Normalizer.h
index bd1966fbc..bf6450daf 100644
--- a/libraries/predictors/include/Normalizer.h
+++ b/libraries/predictors/include/Normalizer.h
@@ -46,4 +46,31 @@ namespace predictors
 } // namespace predictors
 } // namespace ell
 
-#include "../tcc/Normalizer.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace predictors
+{
+    template <data::IterationPolicy policy, typename TransformationType>
+    inline Normalizer<policy, TransformationType>::Normalizer(TransformationType transformation) :
+        _transformation(transformation)
+    {
+    }
+
+    template <data::IterationPolicy policy, typename TransformationType>
+    template <typename OutputDataVectorType, typename InputDataVectorType>
+    OutputDataVectorType Normalizer<policy, TransformationType>::Compute(const InputDataVectorType& input) const
+    {
+        return data::TransformAs<InputDataVectorType, policy, OutputDataVectorType>(input, _transformation);
+    }
+
+    template <data::IterationPolicy policy, typename TransformationType>
+    Normalizer<policy, TransformationType> MakeTransformationNormalizer(TransformationType transformation)
+    {
+        return Normalizer<policy, TransformationType>(transformation);
+    }
+} // namespace predictors
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/predictors/include/SignPredictor.h b/libraries/predictors/include/SignPredictor.h
index 325e43ec3..84cd5cc64 100644
--- a/libraries/predictors/include/SignPredictor.h
+++ b/libraries/predictors/include/SignPredictor.h
@@ -75,4 +75,55 @@ namespace predictors
 } // namespace predictors
 } // namespace ell
 
-#include "../tcc/SignPredictor.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace predictors
+{
+    template <typename PredictorType>
+    SignPredictor<PredictorType> MakeSignPredictor(PredictorType predictor)
+    {
+        return SignPredictor<PredictorType>(std::move(predictor));
+    }
+
+    template <typename PredictorType>
+    SignPredictor<PredictorType>::SignPredictor(PredictorType predictor) :
+        _predictor(std::move(predictor))
+    {
+    }
+
+    template <typename PredictorType>
+    PredictorType& SignPredictor<PredictorType>::GetPredictor()
+    {
+        return _predictor;
+    }
+
+    template <typename PredictorType>
+    const PredictorType& SignPredictor<PredictorType>::GetPredictor() const
+    {
+        return _predictor;
+    }
+
+    template <typename PredictorType>
+    bool SignPredictor<PredictorType>::Predict(const DataVectorType& dataVector) const
+    {
+        auto prediction = _predictor.Predict(dataVector);
+        return prediction > 0;
+    }
+
+    template <typename PredictorType>
+    void SignPredictor<PredictorType>::WriteToArchive(utilities::Archiver& archiver) const
+    {
+        archiver["predictor"] << _predictor;
+    }
+
+    template <typename PredictorType>
+    void SignPredictor<PredictorType>::ReadFromArchive(utilities::Unarchiver& archiver)
+    {
+        archiver["predictor"] >> _predictor;
+    }
+} // namespace predictors
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/predictors/neural/include/Activation.h b/libraries/predictors/neural/include/Activation.h
index bae8b00ca..1d70ca453 100644
--- a/libraries/predictors/neural/include/Activation.h
+++ b/libraries/predictors/neural/include/Activation.h
@@ -164,4 +164,93 @@ namespace predictors
 } // namespace predictors
 } // namespace ell
 
-#include "../tcc/Activation.tcc"
+#pragma region implementation
+
+#include <algorithm>
+#include <limits>
+
+namespace ell
+{
+namespace predictors
+{
+    namespace neural
+    {
+        template <typename ElementType>
+        ElementType ActivationImpl<ElementType>::operator()(const ElementType input) const
+        {
+            return Apply(input);
+        }
+
+        template <typename ElementType>
+        ElementType ActivationImpl<ElementType>::ApplyIndex(const ElementType input, const math::IntegerTriplet& /*index*/) const
+        {
+            return Apply(input);
+        }
+
+        template <typename ElementType>
+        Activation<ElementType>::Activation(std::unique_ptr<ActivationImpl<ElementType>>& impl) :
+            _impl(std::move(impl))
+        {}
+
+        template <typename ElementType>
+        Activation<ElementType>::Activation(ActivationImpl<ElementType>* impl) :
+            _impl(impl)
+        {
+        }
+
+        template <typename ElementType>
+        Activation<ElementType>::Activation(const Activation<ElementType>& other) :
+            _impl(std::move(other._impl->Copy()))
+        {}
+
+        template <typename ElementType>
+        Activation<ElementType>& Activation<ElementType>::operator=(const Activation<ElementType>& other)
+        {
+            if (this != &other)
+            {
+                auto temp = other._impl->Copy();
+                _impl.swap(temp);
+            }
+            return *this;
+        }
+
+        template <typename ElementType>
+        ElementType Activation<ElementType>::Apply(const ElementType input) const
+        {
+            return _impl->Apply(input);
+        }
+
+        template <typename ElementType>
+        ElementType Activation<ElementType>::operator()(const ElementType input) const
+        {
+            return _impl->Apply(input);
+        }
+
+        template <typename ElementType>
+        ElementType Activation<ElementType>::ApplyIndex(const ElementType input, const math::IntegerTriplet& index) const
+        {
+            return _impl->ApplyIndex(input, index);
+        }
+
+        template <typename ElementType>
+        void Activation<ElementType>::Apply(math::ColumnVector<ElementType>& input) const
+        {
+            input.Transform([this](ElementType value) { return _impl->Apply(value); });
+        }
+
+        template <typename ElementType>
+        void Activation<ElementType>::WriteToArchive(utilities::Archiver& archiver) const
+        {
+            archiver["activation"] << _impl;
+        }
+
+        template <typename ElementType>
+        void Activation<ElementType>::ReadFromArchive(utilities::Unarchiver& archiver)
+        {
+            archiver["activation"] >> _impl;
+        }
+    } // namespace neural
+} // namespace predictors
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/predictors/neural/include/ActivationLayer.h b/libraries/predictors/neural/include/ActivationLayer.h
index b07a6db96..e7e16d0c6 100644
--- a/libraries/predictors/neural/include/ActivationLayer.h
+++ b/libraries/predictors/neural/include/ActivationLayer.h
@@ -77,4 +77,82 @@ namespace predictors
 } // namespace predictors
 } // namespace ell
 
-#include "../tcc/ActivationLayer.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace predictors
+{
+    namespace neural
+    {
+        template <typename ElementType>
+        ActivationLayer<ElementType>::ActivationLayer(const LayerParameters& layerParameters, const ActivationType& activation) :
+            Layer<ElementType>(layerParameters),
+            _activation(activation)
+        {
+            ValidateDimensions();
+        }
+
+        template <typename ElementType>
+        ActivationLayer<ElementType>::ActivationLayer(const ActivationLayer& other) :
+            Layer<ElementType>(other),
+            _activation(other._activation)
+        {
+        }
+
+        template <typename ElementType>
+        void ActivationLayer<ElementType>::ValidateDimensions()
+        {
+            auto output = GetOutputMinusPadding();
+            auto& input = _layerParameters.input;
+            if (input.NumRows() > output.NumRows() || input.NumColumns() > output.NumColumns() || input.NumChannels() > output.NumChannels())
+            {
+                throw utilities::InputException(utilities::InputExceptionErrors::sizeMismatch, "Input tensor must not exceed output tensor (minus padding) dimensions for activation layer.");
+            }
+        }
+
+        template <typename ElementType>
+        void ActivationLayer<ElementType>::Compute()
+        {
+            auto output = GetOutputMinusPadding();
+            auto input = _layerParameters.input;
+
+            for (size_t i = 0; i < input.NumRows(); i++)
+            {
+                for (size_t j = 0; j < input.NumColumns(); j++)
+                {
+                    for (size_t k = 0; k < input.NumChannels(); k++)
+                    {
+                        ElementType value = input(i, j, k);
+                        output(i, j, k) = _activation.ApplyIndex(value, math::IntegerTriplet{ i, j, k });
+                    }
+                }
+            }
+        }
+
+        template <typename ElementType>
+        void ActivationLayer<ElementType>::WriteToArchive(utilities::Archiver& archiver) const
+        {
+            Layer<ElementType>::WriteToArchive(archiver);
+            _activation.WriteToArchive(archiver);
+        }
+
+        template <typename ElementType>
+        void ActivationLayer<ElementType>::ReadFromArchive(utilities::Unarchiver& archiver)
+        {
+            Layer<ElementType>::ReadFromArchive(archiver);
+
+            if (archiver.HasNextPropertyName("activation"))
+            {
+                _activation.ReadFromArchive(archiver);
+            }
+            if (!_activation.GetImpl())
+            {
+                _activation.LegacyReadFromArchive(archiver);
+            }
+        }
+    } // namespace neural
+} // namespace predictors
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/predictors/neural/include/BatchNormalizationLayer.h b/libraries/predictors/neural/include/BatchNormalizationLayer.h
index 08964e76c..ad9b7970c 100644
--- a/libraries/predictors/neural/include/BatchNormalizationLayer.h
+++ b/libraries/predictors/neural/include/BatchNormalizationLayer.h
@@ -92,4 +92,90 @@ namespace predictors
 } // namespace predictors
 } // namespace ell
 
-#include "../tcc/BatchNormalizationLayer.tcc"
+#pragma region implementation
+
+#include <cmath>
+
+namespace ell
+{
+namespace predictors
+{
+    namespace neural
+    {
+        template <typename ElementType>
+        BatchNormalizationLayer<ElementType>::BatchNormalizationLayer(const LayerParameters& layerParameters, const VectorType& mean, const VectorType& variance, ElementType epsilon, EpsilonSummand epsilonSummand) :
+            Layer<ElementType>(layerParameters),
+            _multiplicationValues(mean.Size()),
+            _additionValues(variance.Size()),
+            _epsilon(epsilon),
+            _epsilonSummand(epsilonSummand)
+        {
+            if (mean.Size() != variance.Size())
+            {
+                throw utilities::InputException(utilities::InputExceptionErrors::sizeMismatch, GetRuntimeTypeName() + ": Size of 'mean' and 'variance' must match");
+            }
+            if (_layerParameters.input.Size() != GetOutputMinusPadding().Size())
+            {
+                throw utilities::InputException(utilities::InputExceptionErrors::sizeMismatch, GetRuntimeTypeName() + ": Expected size of input and output tensor (minus padding) to match");
+            }
+            if (mean.Size() != NumOutputChannels())
+            {
+                throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, GetRuntimeTypeName() + ": Number of 'mean' and 'variance' values must equal number of channels in output");
+            }
+
+            // Batch norm is: outputValue = (inputValue - mean) / (sqrt(variance) + _epsilon)
+            // To turn this into one MultiplyAdd operation, we can rearrange it to:
+            // EpsilonSummand::Variance:
+            //   outputValue = inputValue * (1/(sqrt(variance + _epsilon))) + (-mean * 1/(sqrt(variance + _epsilon)))
+            // EpsilonSummand::SqrtVariance:
+            //   outputValue = inputValue * (1/(sqrt(variance) + _epsilon)) + (-mean * 1/(sqrt(variance) + _epsilon))
+            for (size_t i = 0; i < _additionValues.Size(); i++)
+            {
+                ElementType varianceFactor = (_epsilonSummand == EpsilonSummand::Variance) ? (1 / (std::sqrt(variance[i] + _epsilon))) : (1 / (std::sqrt(variance[i]) + _epsilon));
+
+                _multiplicationValues[i] = varianceFactor;
+                _additionValues[i] = -mean[i] * varianceFactor;
+            }
+        }
+
+        template <typename ElementType>
+        void BatchNormalizationLayer<ElementType>::Compute()
+        {
+            auto output = GetOutputMinusPadding();
+            auto input = _layerParameters.input;
+
+            AssignValues(input, output);
+            math::ScaleAddUpdate<math::Dimension::channel>(_multiplicationValues, _additionValues, output);
+        }
+
+        template <typename ElementType>
+        void BatchNormalizationLayer<ElementType>::WriteToArchive(utilities::Archiver& archiver) const
+        {
+            Layer<ElementType>::WriteToArchive(archiver);
+
+            math::VectorArchiver::Write(_multiplicationValues, "multiplicationValues", archiver);
+            math::VectorArchiver::Write(_additionValues, "additionValues", archiver);
+
+            archiver["epsilon"] << _epsilon;
+            archiver["epsilonSummand"] << static_cast<int>(_epsilonSummand);
+        }
+
+        template <typename ElementType>
+        void BatchNormalizationLayer<ElementType>::ReadFromArchive(utilities::Unarchiver& archiver)
+        {
+            Layer<ElementType>::ReadFromArchive(archiver);
+
+            math::VectorArchiver::Read(_multiplicationValues, "multiplicationValues", archiver);
+            math::VectorArchiver::Read(_additionValues, "additionValues", archiver);
+
+            archiver["epsilon"] >> _epsilon;
+
+            int value;
+            archiver["epsilonSummand"] >> value;
+            _epsilonSummand = static_cast<EpsilonSummand>(value);
+        }
+    } // namespace neural
+} // namespace predictors
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/predictors/neural/include/BiasLayer.h b/libraries/predictors/neural/include/BiasLayer.h
index bdf6fcbe3..c0ab5869a 100644
--- a/libraries/predictors/neural/include/BiasLayer.h
+++ b/libraries/predictors/neural/include/BiasLayer.h
@@ -73,4 +73,58 @@ namespace predictors
 } // namespace predictors
 } // namespace ell
 
-#include "../tcc/BiasLayer.tcc"
\ No newline at end of file
+#pragma region implementation
+
+namespace ell
+{
+namespace predictors
+{
+    namespace neural
+    {
+
+        template <typename ElementType>
+        BiasLayer<ElementType>::BiasLayer(const LayerParameters& layerParameters, const VectorType& bias) :
+            Layer<ElementType>(layerParameters),
+            _bias(bias)
+        {
+            if (this->GetInputShape() != this->GetOutputShapeMinusPadding())
+            {
+                throw utilities::InputException(utilities::InputExceptionErrors::sizeMismatch, GetRuntimeTypeName() + ": Expected size of input and output tensor (minus padding) to match");
+            }
+            if (_bias.Size() != NumOutputChannels())
+            {
+                throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, GetRuntimeTypeName() + ": Number of 'bias' values must equal number of channels in output");
+            }
+        }
+
+        template <typename ElementType>
+        void BiasLayer<ElementType>::Compute()
+        {
+            auto output = GetOutputMinusPadding();
+            auto input = _layerParameters.input;
+
+            AssignValues(input, output);
+            math::AddUpdate<math::Dimension::channel>(_bias, output);
+        }
+
+        template <typename ElementType>
+        void BiasLayer<ElementType>::WriteToArchive(utilities::Archiver& archiver) const
+        {
+            Layer<ElementType>::WriteToArchive(archiver);
+
+            math::VectorArchiver::Write(_bias, "bias", archiver);
+        }
+
+        template <typename ElementType>
+        void BiasLayer<ElementType>::ReadFromArchive(utilities::Unarchiver& archiver)
+        {
+            Layer<ElementType>::ReadFromArchive(archiver);
+
+            math::VectorArchiver::Read(_bias, "bias", archiver);
+        }
+
+    } // namespace neural
+} // namespace predictors
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/predictors/neural/include/BinaryConvolutionalLayer.h b/libraries/predictors/neural/include/BinaryConvolutionalLayer.h
index ee5f27fb3..72d2a5eac 100644
--- a/libraries/predictors/neural/include/BinaryConvolutionalLayer.h
+++ b/libraries/predictors/neural/include/BinaryConvolutionalLayer.h
@@ -171,4 +171,496 @@ namespace predictors
 } // namespace predictors
 } // namespace ell
 
-#include "../tcc/BinaryConvolutionalLayer.tcc"
\ No newline at end of file
+#pragma region implementation
+
+// TODO: let's make a popcount function that does the right thing
+#if defined(_MSC_VER)
+#include <intrin.h>
+#define POPCOUNT64 __popcnt64
+#else
+#define POPCOUNT64 __builtin_popcountl
+#endif
+
+namespace ell
+{
+namespace predictors
+{
+    namespace neural
+    {
+        template <typename ElementType>
+        BinaryConvolutionalLayer<ElementType>::BinaryConvolutionalLayer(const LayerParameters& layerParameters, const BinaryConvolutionalParameters& convolutionalParameters, const ConstTensorReferenceType& weights) :
+            Layer<ElementType>(layerParameters),
+            _convolutionalParameters(convolutionalParameters),
+            _realValuedShapedInputMatrix(0, 0),
+            _realValuedWeightsMatrix(0, 0),
+            _realValuedOutputMatrix(0, 0)
+        {
+            if (weights.GetConstDataPointer() == nullptr)
+            {
+                throw utilities::InputException(utilities::InputExceptionErrors::nullReference, "weights tensor has null data field");
+            }
+
+            if (weights.Size() != (NumOutputChannels() * _layerParameters.input.NumChannels() * convolutionalParameters.receptiveField * convolutionalParameters.receptiveField))
+            {
+                throw utilities::InputException(utilities::InputExceptionErrors::sizeMismatch, "weights dimensions for a convolutional layer should be the size of the receptive field volume * number of filters");
+            }
+
+            ComputeWeightsMatrices(weights);
+            InitializeIOMatrices();
+            ComputeShapedInputPaddingMask();
+        }
+
+        template <typename ElementType>
+        void BinaryConvolutionalLayer<ElementType>::ComputeWeightsMatrices(const ConstTensorReferenceType& weights)
+        {
+            const auto filterWidth = _convolutionalParameters.receptiveField;
+
+            _binarizedWeights.resize(NumOutputChannels());
+            _filterMeans.resize(NumOutputChannels());
+            _realValuedWeightsMatrix = MatrixType(NumOutputChannels(), filterWidth * filterWidth * _layerParameters.input.NumChannels());
+
+            const size_t binarizedFilterVolumeSize = ((filterWidth * filterWidth * _layerParameters.input.NumChannels()) + (_binaryElementSize - 1)) / _binaryElementSize;
+
+            // Binarize the weights and calculate the mean per filter
+            auto flattened = weights.ReferenceAsMatrix();
+            for (size_t startRow = 0; startRow < flattened.NumRows() / filterWidth; ++startRow)
+            {
+                // Iterate over the weights corresponding to the filter and calculate the mean
+                ElementType sum = 0;
+                std::vector<ElementType> filterWeights(filterWidth * filterWidth * _layerParameters.input.NumChannels());
+                for (size_t row = 0; row < filterWidth; row++)
+                {
+                    auto weightsVector = flattened.GetMajorVector(startRow * filterWidth + row);
+
+                    for (size_t i = 0; i < weightsVector.Size(); ++i)
+                    {
+                        const size_t columnOffset = row * weightsVector.Size();
+                        ElementType value = weightsVector[i];
+
+                        sum += std::abs(value);
+                        filterWeights[columnOffset + i] = value;
+                    }
+                }
+
+                ElementType mean = sum / static_cast<ElementType>(filterWeights.size());
+                _filterMeans[startRow] = mean;
+
+                // initialize the mean according to the binary weights scale
+                ElementType scale(1.0);
+                if (_convolutionalParameters.weightsScale == BinaryWeightsScale::mean)
+                {
+                    scale = mean;
+                }
+
+                for (size_t i = 0; i < filterWeights.size(); ++i)
+                {
+                    // Set the weights matrix based on the weights value and mean
+                    _realValuedWeightsMatrix(startRow, i) = (filterWeights[i] > 0) ? scale : -scale;
+                }
+
+                // Binarize and pack the weights
+                _binarizedWeights[startRow].resize(binarizedFilterVolumeSize, 0);
+                for (size_t i = 0; i < filterWeights.size(); ++i)
+                {
+                    size_t block = i / _binaryElementSize;
+                    int bit = i % _binaryElementSize;
+                    if (filterWeights[i] > 0)
+                    {
+                        _binarizedWeights[startRow][block] |= ((uint64_t)1 << bit);
+                    }
+                }
+            }
+        }
+
+        template <typename ElementType>
+        void BinaryConvolutionalLayer<ElementType>::InitializeIOMatrices()
+        {
+            const auto filterWidth = _convolutionalParameters.receptiveField;
+            const auto outputShape = NumOutputRowsMinusPadding() * NumOutputColumnsMinusPadding();
+
+            _realValuedShapedInputMatrix = { filterWidth * filterWidth * _layerParameters.input.NumChannels(), outputShape };
+            _realValuedOutputMatrix = { NumOutputChannels(), outputShape };
+
+            _binarizedShapedInput.resize(outputShape);
+            _shapedInputPaddingMask.resize(outputShape);
+            _shapedInputPaddingMaskSums.resize(outputShape);
+            // Set the sizes of the shapedInput and padding mask vectors
+            const size_t binarizedFilterVolumeSize = ((filterWidth * filterWidth * _layerParameters.input.NumChannels()) - 1) / _binaryElementSize + 1;
+            for (size_t i = 0; i < _binarizedShapedInput.size(); ++i)
+            {
+                _binarizedShapedInput[i].resize(binarizedFilterVolumeSize, 0);
+                _shapedInputPaddingMask[i].resize(binarizedFilterVolumeSize, 0);
+            }
+        }
+
+        template <typename ElementType>
+        void BinaryConvolutionalLayer<ElementType>::Compute()
+        {
+            auto output = GetOutputMinusPadding();
+            auto input = _layerParameters.input;
+
+            if (_convolutionalParameters.method == BinaryConvolutionMethod::gemm)
+            {
+                // Re-shape input.
+                ReceptiveFieldToColumns(input, _realValuedShapedInputMatrix);
+
+                // Multiply reshaped input and weights.
+                math::MultiplyScaleAddUpdate(static_cast<ElementType>(1.0), _realValuedWeightsMatrix, _realValuedShapedInputMatrix, static_cast<ElementType>(0.0), _realValuedOutputMatrix);
+
+                // Re-shape the output into the output tensor
+                for (size_t i = 0; i < output.NumRows(); ++i)
+                {
+                    for (size_t j = 0; j < output.NumColumns(); ++j)
+                    {
+                        for (size_t k = 0; k < output.NumChannels(); ++k)
+                        {
+                            size_t row = k;
+                            size_t column = (i * output.NumColumns()) + j;
+                            output(i, j, k) = _realValuedOutputMatrix(row, column);
+                        }
+                    }
+                }
+            }
+            else
+            {
+                // Use the bitwise method
+                // Binarize and pack the input
+                ReceptiveFieldToBinaryRows(input, _binarizedShapedInput);
+
+                // XOR and sum
+                const size_t filterSize = _convolutionalParameters.receptiveField * _convolutionalParameters.receptiveField * input.NumChannels();
+                const size_t binarizedFilterSize = _binarizedWeights[0].size();
+                const size_t filterDrop = filterSize % _binaryElementSize;
+                const size_t filterAdjust = _binaryElementSize - filterDrop;
+
+                // Iterate over filters
+                for (size_t i = 0; i < output.NumRows(); ++i)
+                {
+                    size_t shapedInputOffset = i * NumOutputColumnsMinusPadding();
+                    for (size_t j = 0; j < output.NumColumns(); ++j)
+                    {
+                        for (size_t k = 0; k < output.NumChannels(); ++k)
+                        {
+                            ElementType sum = 0;
+
+                            auto& binarizedWeights = _binarizedWeights[k];
+                            auto& binarizedShapedInput = _binarizedShapedInput[shapedInputOffset + j];
+                            auto& shapedInputPaddingMask = _shapedInputPaddingMask[shapedInputOffset + j];
+
+                            for (size_t blockIndex = 0; blockIndex < binarizedFilterSize; blockIndex++)
+                            {
+                                const uint64_t fValue = binarizedWeights[blockIndex];
+                                const uint64_t iValue = binarizedShapedInput[blockIndex];
+
+                                if (HasInputZeroPadding())
+                                {
+                                    // Zeros are neither -1 nor 1, mask out the effects
+                                    // of zero padding from the XOR product
+                                    // This logic is only applied to zero padding where the effect
+                                    // of inserting zeros is well-known, other padding
+                                    // schemes that can generate zero values are not special-cased.
+                                    const uint64_t maskValue = shapedInputPaddingMask[blockIndex];
+                                    const uint64_t xorProduct = maskValue & (fValue ^ iValue);
+
+                                    // Apply the actual zero padding, which is to "add back" the number of values
+                                    // that were assumed to be -1
+                                    sum += (2.0f * POPCOUNT64(xorProduct) - _binaryElementSize + POPCOUNT64(~maskValue));
+                                }
+                                else
+                                {
+                                    const uint64_t xorProduct = fValue ^ iValue;
+                                    sum += (2.0f * POPCOUNT64(xorProduct) - _binaryElementSize);
+                                }
+                            }
+
+                            ElementType scale(1.0);
+                            if (_convolutionalParameters.weightsScale == BinaryWeightsScale::mean)
+                            {
+                                scale = _filterMeans[k];
+                            }
+
+                            if (filterDrop == 0)
+                            {
+                                output(i, j, k) = (-scale * sum);
+                            }
+                            else
+                            {
+                                output(i, j, k) = (-scale * (sum + filterAdjust));
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        // Fills a vector of vectors where each row is the values of the receptive field from the input stretched into a vector,
+        // and the number of vectors is equal to the number of locations that a receptive field is slid over the input volume.
+        template <typename ElementType>
+        void BinaryConvolutionalLayer<ElementType>::ReceptiveFieldToBinaryRows(ConstTensorReferenceType input, std::vector<std::vector<uint64_t>>& shapedInput)
+        {
+            const size_t fieldVolumeSize = _convolutionalParameters.receptiveField * _convolutionalParameters.receptiveField * _layerParameters.input.NumChannels();
+            const size_t outputHeight = NumOutputRowsMinusPadding();
+            const size_t outputWidth = NumOutputColumnsMinusPadding();
+            const size_t rowMax = outputWidth * outputHeight;
+
+            for (size_t outRow = 0; outRow < rowMax; ++outRow)
+            {
+                const size_t convolutionalRow = outRow / outputWidth;
+                const size_t convolutionalCol = outRow % outputWidth;
+                const size_t horizontalStart = (convolutionalCol * _convolutionalParameters.stride);
+                const size_t verticalStart = (convolutionalRow * _convolutionalParameters.stride);
+
+                for (size_t f = 0; f < fieldVolumeSize; ++f)
+                {
+                    // Calculate the col, row, depth values in the convolutional field volume
+                    const size_t volDepth = f % input.NumChannels();
+                    const size_t volCol = (f / input.NumChannels()) % _convolutionalParameters.receptiveField;
+                    const size_t volRow = (f / input.NumChannels()) / _convolutionalParameters.receptiveField;
+
+                    // Calculate where this fits in relation to the input volume
+                    const intptr_t sourceCol = horizontalStart + volCol;
+                    const intptr_t sourceRow = verticalStart + volRow;
+                    const intptr_t sourceDepth = volDepth;
+
+                    ElementType value = input(sourceRow, sourceCol, sourceDepth);
+                    const size_t block = (f / _binaryElementSize);
+                    const size_t bit = f % _binaryElementSize;
+
+                    if (bit == 0)
+                    {
+                        // Initialize to zero
+                        shapedInput[outRow][block] = static_cast<uint64_t>(0);
+                    }
+
+                    // Set the bit value
+                    if (value > 0)
+                    {
+                        shapedInput[outRow][block] += ((uint64_t)1 << bit);
+                    }
+                }
+            }
+        }
+
+        template <typename ElementType>
+        void BinaryConvolutionalLayer<ElementType>::ReceptiveFieldToColumns(ConstTensorReferenceType input, MatrixType& shapedInput)
+        {
+            const size_t fieldVolumeSize = _convolutionalParameters.receptiveField * _convolutionalParameters.receptiveField * _layerParameters.input.NumChannels();
+            const size_t convolutionalHeight = NumOutputRowsMinusPadding();
+            const size_t convolutionalWidth = NumOutputColumnsMinusPadding();
+
+            for (size_t f = 0; f < fieldVolumeSize; ++f)
+            {
+                const size_t fieldDepth = f % _layerParameters.input.NumChannels();
+                const size_t fieldColumn = (f / _layerParameters.input.NumChannels()) % _convolutionalParameters.receptiveField;
+                const size_t fieldRow = (f / _layerParameters.input.NumChannels()) / _convolutionalParameters.receptiveField;
+
+                size_t rowOffset = 0;
+                for (size_t h = 0; h < convolutionalHeight; ++h)
+                {
+                    size_t colOffset = 0;
+                    for (size_t w = 0; w < convolutionalWidth; ++w)
+                    {
+                        size_t inputRow = rowOffset + fieldRow;
+                        size_t inputCol = colOffset + fieldColumn;
+
+                        ElementType value = input(inputRow, inputCol, fieldDepth);
+
+                        // Don't binarize zero-padded input when weights are not scaled
+                        if (IsInputZeroPadding(inputRow, inputCol))
+                        {
+                            shapedInput(f, h * convolutionalWidth + w) = value;
+                        }
+                        else
+                        {
+                            shapedInput(f, h * convolutionalWidth + w) = (value > 0) ? 1.0f : -1.0f;
+                        }
+
+                        colOffset += _convolutionalParameters.stride;
+                    }
+                    rowOffset += _convolutionalParameters.stride;
+                }
+            }
+        }
+
+        template <typename ElementType>
+        bool BinaryConvolutionalLayer<ElementType>::HasInputZeroPadding() const
+        {
+            return HasPadding(_layerParameters.inputPaddingParameters, PaddingScheme::zeros);
+        }
+
+        template <typename ElementType>
+        bool BinaryConvolutionalLayer<ElementType>::IsInputZeroPadding(size_t row, size_t column) const
+        {
+            if (HasInputZeroPadding())
+            {
+                const size_t paddingSize = _layerParameters.inputPaddingParameters.paddingSize;
+                const size_t rowPaddingRightIndex = _layerParameters.input.NumRows() - paddingSize;
+                const size_t columnPaddingRightIndex = _layerParameters.input.NumColumns() - paddingSize;
+
+                return row < paddingSize || row >= rowPaddingRightIndex ||
+                       column < paddingSize || column >= columnPaddingRightIndex;
+            }
+
+            return false;
+        }
+
+        template <typename ElementType>
+        void BinaryConvolutionalLayer<ElementType>::WriteToArchive(utilities::Archiver& archiver) const
+        {
+            const size_t filterWidth = _convolutionalParameters.receptiveField;
+            const size_t binarizedFilterVolumeSize = ((filterWidth * filterWidth * _layerParameters.input.NumChannels()) + (_binaryElementSize - 1)) / _binaryElementSize;
+
+            Layer<ElementType>::WriteToArchive(archiver);
+
+            archiver["receptiveField"] << _convolutionalParameters.receptiveField;
+            archiver["stride"] << _convolutionalParameters.stride;
+            archiver["method"] << static_cast<int>(_convolutionalParameters.method);
+            archiver["weightsScale"] << static_cast<int>(_convolutionalParameters.weightsScale);
+
+            // Compute binarized weights
+            size_t numRows = _realValuedWeightsMatrix.NumRows();
+            size_t numCols = _realValuedWeightsMatrix.NumColumns();
+            std::vector<std::vector<uint64_t>> binarizedWeights(numRows);
+            for (size_t rowIndex = 0; rowIndex < numRows; ++rowIndex)
+            {
+                binarizedWeights[rowIndex].resize(binarizedFilterVolumeSize, 0);
+                for (size_t colIndex = 0; colIndex < numCols; ++colIndex)
+                {
+                    size_t block = colIndex / _binaryElementSize;
+                    int bit = colIndex % _binaryElementSize;
+                    if (_realValuedWeightsMatrix(rowIndex, colIndex) > 0)
+                    {
+                        binarizedWeights[rowIndex][block] |= ((uint64_t)1 << bit);
+                    }
+                }
+            }
+
+            std::vector<uint64_t> temp;
+            archiver["binarizedWeights_numVectors"] << binarizedWeights.size();
+            for (size_t i = 0; i < binarizedWeights.size(); ++i)
+            {
+                temp.insert(temp.end(), binarizedWeights[i].begin(), binarizedWeights[i].end());
+            }
+            archiver["binarizedWeights_values"] << temp;
+            temp.clear();
+            archiver["filterMeans"] << _filterMeans;
+        }
+
+        template <typename ElementType>
+        void BinaryConvolutionalLayer<ElementType>::ReadFromArchive(utilities::Unarchiver& archiver)
+        {
+            Layer<ElementType>::ReadFromArchive(archiver);
+
+            archiver["receptiveField"] >> _convolutionalParameters.receptiveField;
+            archiver["stride"] >> _convolutionalParameters.stride;
+            int method;
+            archiver["method"] >> method;
+            _convolutionalParameters.method = static_cast<BinaryConvolutionMethod>(method);
+            int weightsScale;
+            archiver["weightsScale"] >> weightsScale;
+            _convolutionalParameters.weightsScale = static_cast<BinaryWeightsScale>(weightsScale);
+
+            size_t numVectors = 0;
+            std::vector<uint64_t> temp;
+            const size_t binarizedFilterVolumeSize = ((_convolutionalParameters.receptiveField * _convolutionalParameters.receptiveField * _layerParameters.input.NumChannels()) + (_binaryElementSize - 1)) / _binaryElementSize;
+            archiver["binarizedWeights_numVectors"] >> numVectors;
+            archiver["binarizedWeights_values"] >> temp;
+            _binarizedWeights.resize(numVectors);
+            for (size_t i = 0; i < _binarizedWeights.size(); ++i)
+            {
+                _binarizedWeights[i].resize(binarizedFilterVolumeSize, 0);
+                for (size_t j = 0; j < binarizedFilterVolumeSize; ++j)
+                {
+                    _binarizedWeights[i][j] = temp[i * binarizedFilterVolumeSize + j];
+                }
+            }
+            archiver["filterMeans"] >> _filterMeans;
+
+            ComputeRealValuedWeightsMatrix();
+            InitializeIOMatrices();
+            ComputeShapedInputPaddingMask();
+        }
+
+        template <typename ElementType>
+        void BinaryConvolutionalLayer<ElementType>::ComputeRealValuedWeightsMatrix()
+        {
+            const auto filterWidth = _convolutionalParameters.receptiveField;
+            const auto numWeightsColumns = filterWidth * filterWidth * _layerParameters.input.NumChannels();
+            const size_t binarizedFilterVolumeSize = (numWeightsColumns - 1) / _binaryElementSize + 1;
+
+            _realValuedWeightsMatrix = { NumOutputChannels(), numWeightsColumns };
+            for (size_t rowIndex = 0; rowIndex < _binarizedWeights.size(); ++rowIndex)
+            {
+                size_t colIndex = 0;
+                assert(binarizedFilterVolumeSize == _binarizedWeights[rowIndex].size());
+                for (size_t blockIndex = 0; blockIndex < binarizedFilterVolumeSize; blockIndex++)
+                {
+                    const auto bits = _binarizedWeights[rowIndex][blockIndex];
+                    const auto filterMean = _filterMeans[rowIndex];
+
+                    ElementType scale(1.0);
+                    if (_convolutionalParameters.weightsScale == BinaryWeightsScale::mean)
+                    {
+                        scale = filterMean;
+                    }
+
+                    for (size_t bitIndex = 0; bitIndex < _binaryElementSize && colIndex < numWeightsColumns; ++bitIndex, ++colIndex)
+                    {
+                        const auto bitVal = (bits >> bitIndex) & 0x01;
+                        _realValuedWeightsMatrix(rowIndex, colIndex) = bitVal == 0 ? -scale : scale;
+                    }
+                }
+            }
+        }
+
+        template <typename ElementType>
+        void BinaryConvolutionalLayer<ElementType>::ComputeShapedInputPaddingMask()
+        {
+            const size_t fieldVolumeSize = _convolutionalParameters.receptiveField * _convolutionalParameters.receptiveField * _layerParameters.input.NumChannels();
+            const size_t outputHeight = NumOutputRowsMinusPadding();
+            const size_t outputWidth = NumOutputColumnsMinusPadding();
+            const size_t rowMax = outputWidth * outputHeight;
+
+            for (size_t outRow = 0; outRow < rowMax; ++outRow)
+            {
+                const size_t convolutionalRow = outRow / outputWidth;
+                const size_t convolutionalCol = outRow % outputWidth;
+                const size_t horizontalStart = (convolutionalCol * _convolutionalParameters.stride);
+                const size_t verticalStart = (convolutionalRow * _convolutionalParameters.stride);
+                int maskSum = 0;
+
+                for (size_t f = 0; f < fieldVolumeSize; ++f)
+                {
+                    // Calculate the col, row, and depth values in the convolutional field volume
+                    const size_t volCol = (f / _layerParameters.input.NumChannels()) % _convolutionalParameters.receptiveField;
+                    const size_t volRow = (f / _layerParameters.input.NumChannels()) / _convolutionalParameters.receptiveField;
+
+                    // Calculate where this fits in relation to the input volume
+                    const intptr_t sourceCol = horizontalStart + volCol;
+                    const intptr_t sourceRow = verticalStart + volRow;
+
+                    const size_t block = f / _binaryElementSize;
+                    const size_t bit = f % _binaryElementSize;
+
+                    if (bit == 0)
+                    {
+                        // Initialize to ones
+                        _shapedInputPaddingMask[outRow][block] = std::numeric_limits<uint64_t>::max();
+                    }
+
+                    // Set the mask for zero padding, so that the effect of these
+                    // on the bitwise operation is removed
+                    if (IsInputZeroPadding(sourceRow, sourceCol))
+                    {
+                        _shapedInputPaddingMask[outRow][block] -= ((uint64_t)1 << bit);
+                        maskSum += 1;
+                    }
+                }
+                _shapedInputPaddingMaskSums[outRow] = maskSum;
+            }
+        }
+    } // namespace neural
+} // namespace predictors
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/predictors/neural/include/ConvolutionalLayer.h b/libraries/predictors/neural/include/ConvolutionalLayer.h
index b5a50d9c5..b4f8e5714 100644
--- a/libraries/predictors/neural/include/ConvolutionalLayer.h
+++ b/libraries/predictors/neural/include/ConvolutionalLayer.h
@@ -142,4 +142,282 @@ namespace predictors
 } // namespace predictors
 } // namespace ell
 
-#include "../tcc/ConvolutionalLayer.tcc"
+#pragma region implementation
+
+// #include <dsp/include/DiagonalConvolution.h>
+#include <dsp/include/SimpleConvolution.h>
+#include <dsp/include/UnrolledConvolution.h>
+#include <dsp/include/WinogradConvolution.h>
+
+namespace ell
+{
+namespace predictors
+{
+    namespace neural
+    {
+        template <typename ElementType>
+        ConvolutionalLayer<ElementType>::ConvolutionalLayer(const LayerParameters& layerParameters, const ConvolutionalParameters& convolutionalParameters, TensorType weights) :
+            Layer<ElementType>(layerParameters),
+            _convolutionalParameters(convolutionalParameters),
+            _weights(std::move(weights)),
+            _shapedInput{ _convolutionalParameters.receptiveField * _convolutionalParameters.receptiveField * _layerParameters.input.NumChannels(), NumOutputRowsMinusPadding() * NumOutputColumnsMinusPadding() },
+            _weightsMatrix(_layerParameters.outputShape.NumChannels(), _convolutionalParameters.receptiveField * _convolutionalParameters.receptiveField * _layerParameters.input.NumChannels()),
+            _outputMatrix{ NumOutputChannels(), NumOutputRowsMinusPadding() * NumOutputColumnsMinusPadding() },
+            _originalConvolutionMethod(convolutionalParameters.method)
+        {
+            if (_weights.GetDataPointer() == nullptr)
+            {
+                throw utilities::InputException(utilities::InputExceptionErrors::nullReference, "weights tensor has null data field");
+            }
+
+            _isDepthwiseSeparable = (_weights.NumChannels() == 1) && (_layerParameters.input.NumChannels() > 1);
+            if (_isDepthwiseSeparable && (_output.NumChannels() != _layerParameters.input.NumChannels()))
+            {
+                throw utilities::InputException(utilities::InputExceptionErrors::sizeMismatch, "Input and output channel sizes must match for a depthwise-separable convolutional layer");
+            }
+            else if (!_isDepthwiseSeparable && (_weights.Size() != (_output.NumChannels() * _layerParameters.input.NumChannels() * convolutionalParameters.receptiveField * convolutionalParameters.receptiveField)))
+            {
+                throw utilities::InputException(utilities::InputExceptionErrors::sizeMismatch, "weights dimensions for a convolutional layer should be the size of the receptive field volume * number of filters");
+            }
+
+            CalculateConvolutionMethod();
+            ComputeWeightsMatrix();
+        }
+
+        template <typename ElementType>
+        void ConvolutionalLayer<ElementType>::Compute()
+        {
+            auto output = GetOutputMinusPadding();
+            auto& input = _layerParameters.input;
+            auto stride = static_cast<int>(_convolutionalParameters.stride);
+
+            if (!_isDepthwiseSeparable)
+            {
+                switch (_convolutionalParameters.method)
+                {
+                case ConvolutionMethod::simple:
+                {
+                    const int numFilters = static_cast<int>(output.NumChannels());
+                    dsp::Convolve2DSimple(input, _weights, numFilters, stride, output);
+                }
+                break;
+                case ConvolutionMethod::unrolled:
+                {
+                    const int numFilters = static_cast<int>(output.NumChannels());
+                    auto result = dsp::Convolve2DUnrolled(input, _weights, numFilters, stride);
+                    output.CopyFrom(result);
+                }
+                break;
+                case ConvolutionMethod::winograd:
+                {
+                    assert(stride == 1);
+                    const int numFilters = static_cast<int>(output.NumChannels());
+                    auto result = dsp::Convolve2DWinograd(input, _weights, numFilters);
+                    output.CopyFrom(result);
+                }
+                break;
+                case ConvolutionMethod::diagonal:
+                {
+                    // Use the Diagonal method
+
+                    // Flatten the input
+                    auto inputMatrix = input.ReferenceAsMatrix();
+
+                    const size_t depth = input.NumChannels();
+                    const size_t kt = _convolutionalParameters.receptiveField * depth;
+                    const size_t paddingSize = _layerParameters.inputPaddingParameters.paddingSize;
+                    const size_t numConvolutions = (inputMatrix.NumColumns() - kt) / depth + 1;
+                    const size_t numFiltersAtAtime = _convolutionalParameters.numFiltersAtATime;
+                    const size_t numFilters = _layerParameters.outputShape.NumChannels();
+                    auto weightsMatrix = _weights.ReferenceAsMatrix().Transpose();
+
+                    for (size_t j = 0; j < numConvolutions; j++)
+                    {
+                        // Get the sub matrix for Vj
+                        auto Vj = inputMatrix.GetSubMatrix(0, j * depth, inputMatrix.NumRows(), kt);
+
+                        for (size_t filterStart = 0; filterStart < numFilters; filterStart += numFiltersAtAtime)
+                        {
+                            size_t numFiltersToUse = std::min(numFiltersAtAtime, numFilters - filterStart);
+
+                            auto Wl = weightsMatrix.GetSubMatrix(0, filterStart * _convolutionalParameters.receptiveField, weightsMatrix.NumRows(), numFiltersToUse * _convolutionalParameters.receptiveField);
+
+                            MatrixType A(Vj.NumRows(), _convolutionalParameters.receptiveField * numFiltersToUse);
+
+                            math::MultiplyScaleAddUpdate(static_cast<ElementType>(1.0), Vj, Wl, static_cast<ElementType>(0.0), A);
+
+                            for (size_t l = 0; l < numFiltersToUse; l++)
+                            {
+                                for (size_t row = 0; row < (A.NumRows() - 2 * paddingSize); row++)
+                                {
+                                    ElementType sum = 0.0;
+                                    for (size_t diagonal = 0; diagonal < _convolutionalParameters.receptiveField; diagonal++)
+                                    {
+                                        sum += A(row + diagonal, l * _convolutionalParameters.receptiveField + diagonal);
+                                    }
+                                    output(row, j, filterStart + l) = sum;
+                                }
+                            }
+                        }
+                    }
+                }
+                break;
+
+                default:
+                    throw utilities::LogicException(utilities::LogicExceptionErrors::notImplemented, "Convolution method not supported");
+                }
+            }
+            else // if _isDepthwiseSeparable
+            {
+                const int numFilters = 1;
+                const size_t numInputRows = input.NumRows();
+                const size_t numInputColumns = input.NumColumns();
+                const size_t numOutputRows = output.NumRows();
+                const size_t numOutputColumns = output.NumColumns();
+                const size_t filterRows = _convolutionalParameters.receptiveField;
+
+                for (size_t channel = 0; channel < output.NumChannels(); ++channel)
+                {
+                    using TensorType = typename Layer<ElementType>::TensorType;
+                    using TensorReferenceType = typename Layer<ElementType>::TensorReferenceType;
+
+                    TensorType weights(_weights.GetSubTensor(filterRows * channel, 0, 0, filterRows, filterRows, 1));
+                    const auto& inputChannelTensor = input.GetSubTensor(0, 0, channel, numInputRows, numInputColumns, 1);
+                    TensorReferenceType outputChannelTensor = output.GetSubTensor(0, 0, channel, numOutputRows, numOutputColumns, 1);
+
+                    switch (_convolutionalParameters.method)
+                    {
+                    case ConvolutionMethod::simple:
+                    {
+                        auto result = dsp::Convolve2DSimpleDepthwiseSeparable(inputChannelTensor, weights, numFilters, stride);
+                        outputChannelTensor.CopyFrom(result);
+                    }
+                    break;
+                    case ConvolutionMethod::unrolled:
+                    {
+                        auto result = dsp::Convolve2DUnrolled(inputChannelTensor, weights, numFilters, stride);
+                        outputChannelTensor.CopyFrom(result);
+                    }
+                    break;
+                    case ConvolutionMethod::winograd:
+                    {
+                        auto result = dsp::Convolve2DWinogradDepthwiseSeparable(inputChannelTensor, weights, numFilters, stride);
+                        outputChannelTensor.CopyFrom(result);
+                    }
+                    break;
+                    default:
+                        throw utilities::LogicException(utilities::LogicExceptionErrors::notImplemented, "Convolution method not supported for depthwise separable convolution");
+                    }
+                }
+            }
+        }
+
+        template <typename ElementType>
+        void ConvolutionalLayer<ElementType>::WriteToArchive(utilities::Archiver& archiver) const
+        {
+            Layer<ElementType>::WriteToArchive(archiver);
+
+            archiver["receptiveField"] << _convolutionalParameters.receptiveField;
+            archiver["stride"] << _convolutionalParameters.stride;
+            archiver["method"] << static_cast<int>(_originalConvolutionMethod);
+            archiver["numFiltersAtATime"] << static_cast<int>(_convolutionalParameters.numFiltersAtATime);
+
+            math::TensorArchiver::Write(_weights, "weights", archiver);
+        }
+
+        template <typename ElementType>
+        void ConvolutionalLayer<ElementType>::ReadFromArchive(utilities::Unarchiver& archiver)
+        {
+            Layer<ElementType>::ReadFromArchive(archiver);
+
+            archiver["receptiveField"] >> _convolutionalParameters.receptiveField;
+            archiver["stride"] >> _convolutionalParameters.stride;
+            int method;
+            archiver["method"] >> method;
+            _originalConvolutionMethod = static_cast<ConvolutionMethod>(method);
+            int numFilters;
+            archiver["numFiltersAtATime"] >> numFilters;
+            _convolutionalParameters.numFiltersAtATime = static_cast<size_t>(numFilters);
+
+            math::TensorArchiver::Read(_weights, "weights", archiver);
+            _isDepthwiseSeparable = (_weights.NumChannels() == 1) && (_layerParameters.input.NumChannels() > 1);
+            CalculateConvolutionMethod();
+            ComputeWeightsMatrix();
+            InitializeIOMatrices();
+        }
+
+        template <typename ElementType>
+        void ConvolutionalLayer<ElementType>::ComputeWeightsMatrix()
+        {
+            if (_convolutionalParameters.method == ConvolutionMethod::unrolled)
+            {
+                _weightsMatrix = { _layerParameters.outputShape.NumChannels(), _convolutionalParameters.receptiveField * _convolutionalParameters.receptiveField * _layerParameters.input.NumChannels() };
+                // Use the unrolled method
+                // Reshape the weights
+                auto flattened = _weights.ReferenceAsMatrix();
+                for (size_t startRow = 0; startRow < flattened.NumRows() / _convolutionalParameters.receptiveField; startRow++)
+                {
+                    for (size_t row = 0; row < _convolutionalParameters.receptiveField; row++)
+                    {
+                        auto weightsVector = flattened.GetMajorVector(startRow * _convolutionalParameters.receptiveField + row);
+                        for (size_t i = 0; i < weightsVector.Size(); i++)
+                        {
+                            const size_t columnOffset = row * weightsVector.Size();
+                            _weightsMatrix(startRow, columnOffset + i) = weightsVector[i];
+                        }
+                    }
+                }
+            }
+        }
+
+        template <typename ElementType>
+        void ConvolutionalLayer<ElementType>::InitializeIOMatrices()
+        {
+            _shapedInput = { _convolutionalParameters.receptiveField * _convolutionalParameters.receptiveField * _layerParameters.input.NumChannels(), NumOutputRowsMinusPadding() * NumOutputColumnsMinusPadding() };
+            _outputMatrix = { NumOutputChannels(), NumOutputRowsMinusPadding() * NumOutputColumnsMinusPadding() };
+        }
+
+        template <typename ElementType>
+        void ConvolutionalLayer<ElementType>::CalculateConvolutionMethod()
+        {
+            _convolutionalParameters.method = _originalConvolutionMethod;
+            switch (_convolutionalParameters.method)
+            {
+            case ConvolutionMethod::automatic:
+                _convolutionalParameters.method = _isDepthwiseSeparable ? ConvolutionMethod::simple : ConvolutionMethod::unrolled;
+                break;
+            case ConvolutionMethod::simple:
+            case ConvolutionMethod::unrolled: // fallthrough
+                // do nothing
+                break;
+            case ConvolutionMethod::diagonal:
+                // Verify that we meet the criteria for doing Diagonal method. If not,
+                // choose the normal method.
+                if ((_convolutionalParameters.receptiveField % 2 == 0) || _convolutionalParameters.stride != 1)
+                {
+                    _convolutionalParameters.method = _isDepthwiseSeparable ? ConvolutionMethod::simple : ConvolutionMethod::unrolled;
+                }
+                break;
+            case ConvolutionMethod::winograd:
+                // Verify that we meet the criteria for doing Winograd method. If not,
+                // choose the normal method.
+                if (_convolutionalParameters.stride != 1 || _convolutionalParameters.receptiveField != 3)
+                {
+                    _convolutionalParameters.method = _isDepthwiseSeparable ? ConvolutionMethod::simple : ConvolutionMethod::unrolled;
+                }
+                break;
+            }
+            if (_isDepthwiseSeparable)
+            {
+                // Verify we can use a workable method for depthwise separable convolutions.
+                if ((_convolutionalParameters.method != ConvolutionMethod::unrolled) && (_convolutionalParameters.method != ConvolutionMethod::simple) && (_convolutionalParameters.method != ConvolutionMethod::winograd))
+                {
+                    _convolutionalParameters.method = ConvolutionMethod::simple;
+                }
+            }
+        }
+    } // namespace neural
+} // namespace predictors
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/predictors/neural/include/FullyConnectedLayer.h b/libraries/predictors/neural/include/FullyConnectedLayer.h
index 38bec53e9..69801e129 100644
--- a/libraries/predictors/neural/include/FullyConnectedLayer.h
+++ b/libraries/predictors/neural/include/FullyConnectedLayer.h
@@ -91,4 +91,112 @@ namespace predictors
 } // namespace predictors
 } // namespace ell
 
-#include "../tcc/FullyConnectedLayer.tcc"
\ No newline at end of file
+#pragma region implementation
+
+#include <utilities/include/StringUtil.h>
+
+namespace ell
+{
+namespace predictors
+{
+    namespace neural
+    {
+
+        template <typename ElementType>
+        FullyConnectedLayer<ElementType>::FullyConnectedLayer(const LayerParameters& layerParameters, ConstMatrixReferenceType& weights) :
+            Layer<ElementType>(layerParameters),
+            _weights(weights.NumRows(), weights.NumColumns()),
+            _shapedInput(layerParameters.input.Size()),
+            _outputVector(GetOutputMinusPadding().Size())
+        {
+            _weights = weights;
+            if (_weights.NumRows() != GetOutputMinusPadding().Size())
+            {
+                throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument,
+                                                ell::utilities::FormatString("FullyConnectedLayer weights has %d row, but expecting %d based on output size minus padding",
+                                                                             _weights.NumRows(),
+                                                                             GetOutputMinusPadding().Size()));
+            }
+        }
+
+        template <typename ElementType>
+        FullyConnectedLayer<ElementType>::FullyConnectedLayer(const LayerParameters& layerParameters, ConstTensorReferenceType& weights) :
+            Layer<ElementType>(layerParameters),
+            _weights(GetOutputMinusPadding().Size(), layerParameters.input.Size(), weights.ToArray()),
+            _shapedInput(layerParameters.input.Size()),
+            _outputVector(GetOutputMinusPadding().Size())
+        {
+            if (weights.Size() != GetOutputMinusPadding().Size() * layerParameters.input.Size())
+            {
+                throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument,
+                                                ell::utilities::FormatString("FullyConnectedLayer weights size %d does not match output size (minus padding) of %d times input size %d",
+                                                                             _weights.NumRows(),
+                                                                             GetOutputMinusPadding().Size(),
+                                                                             layerParameters.input.Size()));
+            }
+        }
+
+        template <typename ElementType>
+        void FullyConnectedLayer<ElementType>::Compute()
+        {
+            auto output = GetOutputMinusPadding();
+            auto& input = _layerParameters.input;
+
+            // Reshape the input into a vector
+            size_t columnIndex = 0;
+            for (size_t i = 0; i < input.NumRows(); i++)
+            {
+                for (size_t j = 0; j < input.NumColumns(); j++)
+                {
+                    for (size_t k = 0; k < input.NumChannels(); k++)
+                    {
+                        _shapedInput[columnIndex++] = input(i, j, k);
+                    }
+                }
+            }
+
+            math::MultiplyScaleAddUpdate((ElementType)1.0f, _weights, _shapedInput, (ElementType)0.0f, _outputVector);
+
+            // Reshape the output
+            columnIndex = 0;
+            for (size_t i = 0; i < output.NumRows(); i++)
+            {
+                for (size_t j = 0; j < output.NumColumns(); j++)
+                {
+                    for (size_t k = 0; k < output.NumChannels(); k++)
+                    {
+                        output(i, j, k) = _outputVector[columnIndex++];
+                    }
+                }
+            }
+        }
+
+        template <typename ElementType>
+        const typename FullyConnectedLayer<ElementType>::MatrixType& FullyConnectedLayer<ElementType>::GetWeights() const
+        {
+            return _weights;
+        }
+
+        template <typename ElementType>
+        void FullyConnectedLayer<ElementType>::WriteToArchive(utilities::Archiver& archiver) const
+        {
+            Layer<ElementType>::WriteToArchive(archiver);
+
+            math::MatrixArchiver::Write(_weights, "weights", archiver);
+        }
+
+        template <typename ElementType>
+        void FullyConnectedLayer<ElementType>::ReadFromArchive(utilities::Unarchiver& archiver)
+        {
+            Layer<ElementType>::ReadFromArchive(archiver);
+
+            math::MatrixArchiver::Read(_weights, "weights", archiver);
+            _shapedInput.Resize(_layerParameters.input.Size());
+            _outputVector.Resize(GetOutputMinusPadding().Size());
+        }
+
+    } // namespace neural
+} // namespace predictors
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/predictors/neural/include/HardSigmoidActivation.h b/libraries/predictors/neural/include/HardSigmoidActivation.h
index e9fb1368e..f01429446 100644
--- a/libraries/predictors/neural/include/HardSigmoidActivation.h
+++ b/libraries/predictors/neural/include/HardSigmoidActivation.h
@@ -49,4 +49,28 @@ namespace predictors
 } // namespace predictors
 } // namespace ell
 
-#include "../tcc/HardSigmoidActivation.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace predictors
+{
+    namespace neural
+    {
+        template <typename ElementType>
+        ElementType HardSigmoidActivation<ElementType>::Apply(const ElementType input) const
+        {
+            ElementType output = (static_cast<ElementType>(0.2) * input) + static_cast<ElementType>(0.5);
+            return output < static_cast<ElementType>(0) ? static_cast<ElementType>(0) : (output > static_cast<ElementType>(1) ? static_cast<ElementType>(1) : output);
+        }
+
+        template <typename ElementType>
+        std::unique_ptr<ActivationImpl<ElementType>> HardSigmoidActivation<ElementType>::Copy() const
+        {
+            return std::make_unique<HardSigmoidActivation<ElementType>>();
+        }
+    } // namespace neural
+} // namespace predictors
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/predictors/neural/include/InputLayer.h b/libraries/predictors/neural/include/InputLayer.h
index f72ce44a4..dafc73400 100644
--- a/libraries/predictors/neural/include/InputLayer.h
+++ b/libraries/predictors/neural/include/InputLayer.h
@@ -119,4 +119,99 @@ namespace predictors
 } // namespace predictors
 } // namespace ell
 
-#include "../tcc/InputLayer.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace predictors
+{
+    namespace neural
+    {
+
+        template <typename ElementType>
+        InputLayer<ElementType>::InputLayer(const InputParameters& inputParameters) :
+            Layer<ElementType>(LayerParameters{ TensorType(1, 1, 1), inputParameters.inputPaddingParameters, inputParameters.outputShape, inputParameters.outputPaddingParameters }),
+            _scale(NumOutputChannels()),
+            _data(inputParameters.inputShape)
+        {
+            _layerParameters.input = _data;
+            _scale.Fill(inputParameters.scale);
+        }
+
+        template <typename ElementType>
+        void InputLayer<ElementType>::SetInput(const DataVectorType& input)
+        {
+            size_t index = 0;
+            auto& inputTensor = _data;
+
+            for (size_t i = 0; i < inputTensor.NumRows(); ++i)
+            {
+                for (size_t j = 0; j < inputTensor.NumColumns(); ++j)
+                {
+                    for (size_t k = 0; k < inputTensor.NumChannels(); ++k)
+                    {
+                        inputTensor(i, j, k) = static_cast<ElementType>(input[index++]);
+                    }
+                }
+            }
+        }
+
+        template <typename ElementType>
+        void InputLayer<ElementType>::SetInput(const std::vector<ElementType>& input)
+        {
+            size_t index = 0;
+            auto& inputTensor = _data;
+
+            for (size_t i = 0; i < inputTensor.NumRows(); ++i)
+            {
+                for (size_t j = 0; j < inputTensor.NumColumns(); ++j)
+                {
+                    for (size_t k = 0; k < inputTensor.NumChannels(); ++k)
+                    {
+                        inputTensor(i, j, k) = static_cast<ElementType>(input[index++]);
+                    }
+                }
+            }
+        }
+
+        template <typename ElementType>
+        void InputLayer<ElementType>::Compute()
+        {
+            auto output = GetOutputMinusPadding();
+            auto& input = _layerParameters.input;
+
+            AssignValues(input, output);
+            math::ScaleUpdate<math::Dimension::channel>(_scale, output);
+        }
+
+        template <typename ElementType>
+        void InputLayer<ElementType>::WriteToArchive(utilities::Archiver& archiver) const
+        {
+            Layer<ElementType>::WriteToArchive(archiver);
+
+            math::TensorArchiver::Write(_data, "data", archiver);
+            if (_scale.Size() > 0)
+                archiver["scale"] << _scale[0];
+            else
+                archiver["scale"] << 1;
+        }
+
+        template <typename ElementType>
+        void InputLayer<ElementType>::ReadFromArchive(utilities::Unarchiver& archiver)
+        {
+            Layer<ElementType>::ReadFromArchive(archiver);
+
+            math::TensorArchiver::Read(_data, "data", archiver);
+            ElementType scale = 1;
+            archiver["scale"] >> scale;
+            _scale.Resize(NumOutputChannels());
+            _scale.Fill(scale);
+
+            _layerParameters.input = _data;
+        }
+
+    } // namespace neural
+} // namespace predictors
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/predictors/neural/include/Layer.h b/libraries/predictors/neural/include/Layer.h
index acaa8d6aa..f6e051e40 100644
--- a/libraries/predictors/neural/include/Layer.h
+++ b/libraries/predictors/neural/include/Layer.h
@@ -329,4 +329,242 @@ namespace predictors
 } // namespace predictors
 } // namespace ell
 
-#include "../tcc/Layer.tcc"
+#pragma region implementation
+
+#include <limits>
+#include <ostream>
+#include <type_traits>
+
+namespace ell
+{
+namespace predictors
+{
+    namespace neural
+    {
+        template <typename ValueType>
+        ValueType GetPaddingValue(PaddingScheme paddingScheme)
+        {
+            switch (paddingScheme)
+            {
+            case PaddingScheme::zeros:
+                return static_cast<ValueType>(0);
+            case PaddingScheme::minusOnes:
+                return static_cast<ValueType>(-1);
+            case PaddingScheme::alternatingZeroAndOnes:
+                return static_cast<ValueType>(0);
+            case PaddingScheme::randomZeroAndOnes:
+                return static_cast<ValueType>(0);
+            case PaddingScheme::min:
+                return -std::numeric_limits<ValueType>::max();
+            case PaddingScheme::max:
+                return std::numeric_limits<ValueType>::max();
+            }
+            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "Invalid PaddingScheme");
+        }
+
+        //
+        // Layer
+        //
+        template <typename ElementType>
+        Layer<ElementType>::Layer(const LayerParameters& layerParameters) :
+            _layerParameters(layerParameters),
+            _output(layerParameters.outputShape)
+        {
+            InitializeOutputValues(_output, layerParameters.outputPaddingParameters);
+        }
+
+        template <typename ElementType>
+        Layer<ElementType>::Layer(const Layer& other) :
+            _layerParameters(other._layerParameters),
+            _output(other._layerParameters.outputShape)
+        {
+            InitializeOutputValues(_output, other._layerParameters.outputPaddingParameters);
+        }
+
+        template <typename ElementType>
+        typename Layer<ElementType>::Shape Layer<ElementType>::GetInputShapeMinusPadding() const
+        {
+            auto&& inputShape = _layerParameters.input.GetShape();
+            auto paddingSize = _layerParameters.inputPaddingParameters.paddingSize;
+            if (inputShape.NumRows() < 2 * paddingSize || inputShape.NumColumns() < 2 * paddingSize)
+            {
+                throw utilities::InputException(utilities::InputExceptionErrors::sizeMismatch, "Input size not large enough to accomodate padding");
+            }
+            return { inputShape.NumRows() - 2 * paddingSize, inputShape.NumColumns() - 2 * paddingSize, inputShape.NumChannels() };
+        }
+
+        template <typename ElementType>
+        typename Layer<ElementType>::Shape Layer<ElementType>::GetOutputShapeMinusPadding() const
+        {
+            auto&& outputShape = _layerParameters.outputShape;
+            auto paddingSize = _layerParameters.outputPaddingParameters.paddingSize;
+            if (outputShape.NumRows() < 2 * paddingSize || outputShape.NumColumns() < 2 * paddingSize)
+            {
+                throw utilities::InputException(utilities::InputExceptionErrors::sizeMismatch, "Output size not large enough to accommodate padding");
+            }
+            return { outputShape.NumRows() - 2 * paddingSize, outputShape.NumColumns() - 2 * paddingSize, outputShape.NumChannels() };
+        }
+
+        template <typename ElementType>
+        void Layer<ElementType>::InitializeOutputValues(TensorType& output, PaddingParameters outputPaddingParameters)
+        {
+            switch (outputPaddingParameters.paddingScheme)
+            {
+            case PaddingScheme::minusOnes:
+                output.Fill(-1);
+                break;
+            case PaddingScheme::randomZeroAndOnes:
+                output.Generate([] { return static_cast<ElementType>(std::rand() % 2); });
+                break;
+            case PaddingScheme::alternatingZeroAndOnes:
+            {
+                for (size_t row = 0; row < output.NumRows(); row++)
+                {
+                    for (size_t column = 0; column < output.NumColumns(); column++)
+                    {
+                        ElementType value = static_cast<ElementType>((row % 2) ^ (column % 2));
+                        for (size_t channel = 0; channel < output.NumChannels(); channel++)
+                        {
+                            output(row, column, channel) = value;
+                        }
+                    }
+                }
+            }
+            break;
+            case PaddingScheme::min:
+                if (std::is_signed<ElementType>::value)
+                {
+                    output.Fill(-std::numeric_limits<ElementType>::max());
+                }
+                else
+                {
+                    output.Fill(std::numeric_limits<ElementType>::min());
+                }
+                break;
+            case PaddingScheme::max:
+                output.Fill(std::numeric_limits<ElementType>::max());
+                break;
+            default:
+                output.Fill(0);
+                break;
+            }
+        }
+
+        template <typename ElementType>
+        void Layer<ElementType>::Print(std::ostream& os, size_t numValuesToPrint) const
+        {
+            static constexpr size_t bufferLength = 1024;
+            char buffer[bufferLength] = { 0 };
+            std::string layerName = LayerNames[static_cast<uint32_t>(GetLayerType())];
+
+            snprintf(buffer, bufferLength, "======== %s layer (%zd x %zd x %zd) pad: %zd -> (%zd x %zd x %zd) pad: %zd ========", layerName.c_str(), _layerParameters.input.NumRows() - 2 * _layerParameters.inputPaddingParameters.paddingSize, _layerParameters.input.NumColumns() - 2 * _layerParameters.inputPaddingParameters.paddingSize, _layerParameters.input.NumChannels(), _layerParameters.inputPaddingParameters.paddingSize, _layerParameters.outputShape.NumRows() - 2 * _layerParameters.outputPaddingParameters.paddingSize, _layerParameters.outputShape.NumColumns() - 2 * _layerParameters.outputPaddingParameters.paddingSize, _layerParameters.outputShape.NumChannels(), _layerParameters.outputPaddingParameters.paddingSize);
+
+            os << buffer;
+
+            const ConstTensorReferenceType output(_output);
+            for (size_t i = 0; (i < numValuesToPrint) && (i < output.Size()); i++)
+            {
+                size_t channel = i % output.NumChannels();
+                size_t col = (i / output.NumChannels()) % output.NumColumns();
+                size_t row = i / (output.NumChannels() * output.NumColumns());
+
+                if (i % 10 == 0) os << std::endl;
+
+                if (channel < output.NumChannels() &&
+                    (col + _layerParameters.outputPaddingParameters.paddingSize) < output.NumColumns() &&
+                    (row + _layerParameters.outputPaddingParameters.paddingSize) < output.NumRows())
+                {
+                    const ElementType val = output({ row + _layerParameters.outputPaddingParameters.paddingSize, col + _layerParameters.outputPaddingParameters.paddingSize, channel });
+                    snprintf(buffer, bufferLength, "%+9.5f ", val);
+                    os << buffer;
+                }
+            }
+            os << std::endl
+               << "======== End of " << layerName << " ========" << std::endl;
+        }
+
+        template <typename ElementType>
+        void Layer<ElementType>::WriteToArchive(utilities::Archiver& archiver) const
+        {
+            archiver["inputPaddingScheme"] << static_cast<int>(_layerParameters.inputPaddingParameters.paddingScheme);
+            archiver["inputPaddingSize"] << _layerParameters.inputPaddingParameters.paddingSize;
+
+            std::vector<size_t> outputShape = _layerParameters.outputShape;
+            archiver["outputShape"] << outputShape;
+
+            archiver["outputPaddingScheme"] << static_cast<int>(_layerParameters.outputPaddingParameters.paddingScheme);
+            archiver["outputPaddingSize"] << _layerParameters.outputPaddingParameters.paddingSize;
+        }
+
+        template <typename ElementType>
+        void Layer<ElementType>::ReadFromArchive(utilities::Unarchiver& archiver)
+        {
+            int inputPaddingScheme;
+            archiver["inputPaddingScheme"] >> inputPaddingScheme;
+            _layerParameters.inputPaddingParameters.paddingScheme = static_cast<PaddingScheme>(inputPaddingScheme);
+            archiver["inputPaddingSize"] >> _layerParameters.inputPaddingParameters.paddingSize;
+
+            std::vector<size_t> outputShape;
+            archiver["outputShape"] >> outputShape;
+            math::IntegerTriplet shape;
+            std::copy(outputShape.begin(), outputShape.end(), shape.begin());
+            _layerParameters.outputShape = Shape(shape);
+
+            int outputPaddingScheme;
+            archiver["outputPaddingScheme"] >> outputPaddingScheme;
+            _layerParameters.outputPaddingParameters.paddingScheme = static_cast<PaddingScheme>(outputPaddingScheme);
+            archiver["outputPaddingSize"] >> _layerParameters.outputPaddingParameters.paddingSize;
+
+            _output = TensorType(_layerParameters.outputShape);
+
+            LayerSerializationContext<ElementType>* layerContext = dynamic_cast<LayerSerializationContext<ElementType>*>(&archiver.GetContext());
+            if (layerContext != nullptr)
+            {
+                // Set the input reference to the previously restored layer's output. This is saved in the
+                // serialization context
+                _layerParameters.input = layerContext->GetPreviousOutputReference();
+
+                // Save the output reference to the serialization context
+                layerContext->SetOutputReference(GetOutput());
+            }
+
+            // Set the initial padding
+            InitializeOutputValues(_output, _layerParameters.outputPaddingParameters);
+        }
+
+        template <typename ElementType>
+        typename Layer<ElementType>::ConstTensorReferenceType Layer<ElementType>::GetInputMinusPadding()
+        {
+            auto padding = _layerParameters.inputPaddingParameters.paddingSize;
+            return _layerParameters.input.GetSubTensor({ padding, padding, 0 }, GetInputShapeMinusPadding());
+        }
+
+        template <typename ElementType>
+        typename Layer<ElementType>::TensorReferenceType Layer<ElementType>::GetOutputMinusPadding()
+        {
+            auto padding = _layerParameters.outputPaddingParameters.paddingSize;
+            return _output.GetSubTensor({ padding, padding, 0 },
+                                        { _output.NumRows() - 2 * padding, _output.NumColumns() - 2 * padding, _output.NumChannels() });
+        }
+
+        template <typename ElementType>
+        void Layer<ElementType>::AssignValues(ConstTensorReferenceType& input, TensorReferenceType& output)
+        {
+            DEBUG_THROW(input.NumRows() > output.NumRows() || input.NumColumns() > output.NumColumns() || input.NumChannels() > output.NumChannels(), utilities::InputException(utilities::InputExceptionErrors::sizeMismatch, "Input tensor must not exceed output tensor dimensions."));
+
+            for (size_t i = 0; i < input.NumRows(); i++)
+            {
+                for (size_t j = 0; j < input.NumColumns(); j++)
+                {
+                    for (size_t k = 0; k < input.NumChannels(); k++)
+                    {
+                        output(i, j, k) = input(i, j, k);
+                    }
+                }
+            }
+        }
+    } // namespace neural
+} // namespace predictors
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/predictors/neural/include/LeakyReLUActivation.h b/libraries/predictors/neural/include/LeakyReLUActivation.h
index 396a8fd9e..7e028730b 100644
--- a/libraries/predictors/neural/include/LeakyReLUActivation.h
+++ b/libraries/predictors/neural/include/LeakyReLUActivation.h
@@ -77,4 +77,39 @@ namespace predictors
 } // namespace predictors
 } // namespace ell
 
-#include "../tcc/LeakyReLUActivation.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace predictors
+{
+    namespace neural
+    {
+        template <typename ElementType>
+        ElementType LeakyReLUActivation<ElementType>::Apply(const ElementType input) const
+        {
+            return ((input > 0) ? input : _leakyFactor * input);
+        }
+
+        template <typename ElementType>
+        void LeakyReLUActivation<ElementType>::WriteToArchive(utilities::Archiver& archiver) const
+        {
+            archiver["leakyFactor"] << _leakyFactor;
+        }
+
+        template <typename ElementType>
+        void LeakyReLUActivation<ElementType>::ReadFromArchive(utilities::Unarchiver& archiver)
+        {
+            archiver["leakyFactor"] >> _leakyFactor;
+        }
+
+        template <typename ElementType>
+        std::unique_ptr<ActivationImpl<ElementType>> LeakyReLUActivation<ElementType>::Copy() const
+        {
+            return std::make_unique<LeakyReLUActivation<ElementType>>(_leakyFactor);
+        }
+    } // namespace neural
+} // namespace predictors
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/predictors/neural/include/MaxPoolingFunction.h b/libraries/predictors/neural/include/MaxPoolingFunction.h
index eacb33c0e..0d24fcdf6 100644
--- a/libraries/predictors/neural/include/MaxPoolingFunction.h
+++ b/libraries/predictors/neural/include/MaxPoolingFunction.h
@@ -49,4 +49,36 @@ namespace predictors
 } // namespace predictors
 } // namespace ell
 
-#include "../tcc/MaxPoolingFunction.tcc"
\ No newline at end of file
+#pragma region implementation
+
+#include <algorithm>
+#include <limits>
+
+namespace ell
+{
+namespace predictors
+{
+    namespace neural
+    {
+        template <typename ElementType>
+        MaxPoolingFunction<ElementType>::MaxPoolingFunction() :
+            _max(std::numeric_limits<ElementType>::lowest())
+        {
+        }
+
+        template <typename ElementType>
+        void MaxPoolingFunction<ElementType>::Accumulate(ElementType input)
+        {
+            _max = std::max(_max, input);
+        }
+
+        template <typename ElementType>
+        ElementType MaxPoolingFunction<ElementType>::GetValue() const
+        {
+            return _max;
+        }
+    } // namespace neural
+} // namespace predictors
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/predictors/neural/include/MeanPoolingFunction.h b/libraries/predictors/neural/include/MeanPoolingFunction.h
index a9af292b6..0d4724a2c 100644
--- a/libraries/predictors/neural/include/MeanPoolingFunction.h
+++ b/libraries/predictors/neural/include/MeanPoolingFunction.h
@@ -52,4 +52,37 @@ namespace predictors
 } // namespace predictors
 } // namespace ell
 
-#include "../tcc/MeanPoolingFunction.tcc"
\ No newline at end of file
+#pragma region implementation
+
+#include <cmath>
+
+namespace ell
+{
+namespace predictors
+{
+    namespace neural
+    {
+        template <typename ElementType>
+        MeanPoolingFunction<ElementType>::MeanPoolingFunction() :
+            _sum(0),
+            _numValues(0)
+        {
+        }
+
+        template <typename ElementType>
+        void MeanPoolingFunction<ElementType>::Accumulate(ElementType input)
+        {
+            _sum += input;
+            _numValues++;
+        }
+
+        template <typename ElementType>
+        ElementType MeanPoolingFunction<ElementType>::GetValue() const
+        {
+            return (_sum / (ElementType)_numValues);
+        }
+    } // namespace neural
+} // namespace predictors
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/predictors/neural/include/ParametricReLUActivation.h b/libraries/predictors/neural/include/ParametricReLUActivation.h
index 7a4fa5059..4fb4d6b56 100644
--- a/libraries/predictors/neural/include/ParametricReLUActivation.h
+++ b/libraries/predictors/neural/include/ParametricReLUActivation.h
@@ -87,4 +87,57 @@ namespace predictors
 } // namespace predictors
 } // namespace ell
 
-#include "../tcc/ParametricReLUActivation.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace predictors
+{
+    namespace neural
+    {
+        template <typename ElementType>
+        ParametricReLUActivation<ElementType>::ParametricReLUActivation(TensorType alpha) :
+            _alpha(std::move(alpha))
+        {
+            if (_alpha.GetDataPointer() == nullptr)
+            {
+                throw utilities::InputException(utilities::InputExceptionErrors::nullReference, "alpha tensor has null data field");
+            }
+        }
+
+        template <typename ElementType>
+        ElementType ParametricReLUActivation<ElementType>::Apply(const ElementType input) const
+        {
+            UNUSED(input);
+            // we want people to call the ApplyIndex method in this case.
+            throw utilities::LogicException(utilities::LogicExceptionErrors::notImplemented);
+        }
+
+        template <typename ElementType>
+        ElementType ParametricReLUActivation<ElementType>::ApplyIndex(const ElementType input, const math::IntegerTriplet& index) const
+        {
+            return ((input > 0) ? input : _alpha(index) * input);
+        }
+
+        template <typename ElementType>
+        void ParametricReLUActivation<ElementType>::WriteToArchive(utilities::Archiver& archiver) const
+        {
+            math::TensorArchiver::Write(_alpha, "alpha", archiver);
+        }
+
+        template <typename ElementType>
+        void ParametricReLUActivation<ElementType>::ReadFromArchive(utilities::Unarchiver& archiver)
+        {
+            math::TensorArchiver::Read(_alpha, "alpha", archiver);
+        }
+
+        template <typename ElementType>
+        std::unique_ptr<ActivationImpl<ElementType>> ParametricReLUActivation<ElementType>::Copy() const
+        {
+            return std::make_unique<ParametricReLUActivation<ElementType>>(_alpha);
+        }
+    } // namespace neural
+} // namespace predictors
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/predictors/neural/include/PoolingLayer.h b/libraries/predictors/neural/include/PoolingLayer.h
index 34ab3780b..aa7037814 100644
--- a/libraries/predictors/neural/include/PoolingLayer.h
+++ b/libraries/predictors/neural/include/PoolingLayer.h
@@ -94,4 +94,117 @@ namespace predictors
 } // namespace predictors
 } // namespace ell
 
-#include "../tcc/PoolingLayer.tcc"
\ No newline at end of file
+#pragma region implementation
+
+#include <algorithm>
+
+namespace ell
+{
+namespace predictors
+{
+    namespace neural
+    {
+
+        template <typename ElementType, template <typename> class PoolingFunctionType>
+        PoolingLayer<ElementType, PoolingFunctionType>::PoolingLayer(const LayerParameters& layerParameters, PoolingParameters poolingParameters) :
+            Layer<ElementType>(layerParameters),
+            _poolingParameters(poolingParameters)
+        {
+        }
+
+        template <typename ElementType, template <typename> class PoolingFunctionType>
+        void PoolingLayer<ElementType, PoolingFunctionType>::Compute()
+        {
+            auto input = GetInput();
+            auto output = GetOutputMinusPadding();
+            const size_t poolingWindowSize = _poolingParameters.poolingSize;
+
+            for (size_t row = 0; row < output.NumRows(); row++)
+            {
+                const size_t startRow = row * _poolingParameters.stride;
+                for (size_t column = 0; column < output.NumColumns(); column++)
+                {
+                    const size_t startColumn = column * _poolingParameters.stride;
+                    std::vector<PoolingFunctionType<ElementType>> poolingValues(output.NumChannels());
+
+                    for (size_t pool_y = 0; pool_y < poolingWindowSize; pool_y++)
+                    {
+                        for (size_t pool_x = 0; pool_x < poolingWindowSize; pool_x++)
+                        {
+                            for (size_t channel = 0; channel < output.NumChannels(); channel++)
+                            {
+                                // Account for when part of the pooling window falls beyond the pooling region.
+                                size_t inputRow = startRow + pool_y;
+                                size_t inputColumn = startColumn + pool_x;
+
+                                if ((inputRow < input.NumRows()) && (inputColumn < input.NumColumns()))
+                                {
+                                    poolingValues[channel].Accumulate(input(inputRow, inputColumn, channel));
+                                }
+                                else
+                                {
+                                    poolingValues[channel].Accumulate(poolingValues[channel].GetValueAtPadding());
+                                }
+                            }
+                        }
+                    }
+
+                    for (size_t channel = 0; channel < output.NumChannels(); channel++)
+                    {
+                        output(row, column, channel) = poolingValues[channel].GetValue();
+                    }
+                }
+            }
+        }
+
+        template <typename ElementType, template <typename> class PoolingFunctionType>
+        bool PoolingLayer<ElementType, PoolingFunctionType>::UsesPadding() const
+        {
+            const size_t inputDataPaddingSize = GetLayerParameters().inputPaddingParameters.paddingSize;
+            const auto inputShape = GetInputShapeMinusPadding();
+            const auto outputShape = GetOutputShapeMinusPadding();
+            const auto inputWidth = inputShape.NumRows();
+            const auto outputWidth = outputShape.NumRows();
+            const auto stride = _poolingParameters.stride;
+            const auto poolingSize = _poolingParameters.poolingSize;
+
+            const auto paddedOutputWidth = (inputWidth - 1) / stride + 1; // ceil(inputWidth/stride);
+            const auto nonPaddedOutputWidth = (inputWidth - poolingSize) / stride + 1; // ceil((inputWidth-windowWidth+1) / stride)
+
+            if (outputWidth == nonPaddedOutputWidth)
+            {
+                return false;
+            }
+            else if (outputWidth == paddedOutputWidth)
+            {
+                return true;
+            }
+            else
+            {
+                return inputDataPaddingSize != 0;
+            }
+        }
+
+        template <typename ElementType, template <typename> class PoolingFunctionType>
+        void PoolingLayer<ElementType, PoolingFunctionType>::WriteToArchive(utilities::Archiver& archiver) const
+        {
+            Layer<ElementType>::WriteToArchive(archiver);
+
+            archiver["poolingSize"] << _poolingParameters.poolingSize;
+            archiver["stride"] << _poolingParameters.stride;
+        }
+
+        template <typename ElementType, template <typename> class PoolingFunctionType>
+        void PoolingLayer<ElementType, PoolingFunctionType>::ReadFromArchive(utilities::Unarchiver& archiver)
+        {
+            Layer<ElementType>::ReadFromArchive(archiver);
+
+            archiver["poolingSize"] >> _poolingParameters.poolingSize;
+            archiver["stride"] >> _poolingParameters.stride;
+        }
+
+    } // namespace neural
+} // namespace predictors
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/predictors/neural/include/ReLUActivation.h b/libraries/predictors/neural/include/ReLUActivation.h
index 29096cd17..15e39b7d7 100644
--- a/libraries/predictors/neural/include/ReLUActivation.h
+++ b/libraries/predictors/neural/include/ReLUActivation.h
@@ -49,4 +49,27 @@ namespace predictors
 } // namespace predictors
 } // namespace ell
 
-#include "../tcc/ReLUActivation.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace predictors
+{
+    namespace neural
+    {
+        template <typename ElementType>
+        ElementType ReLUActivation<ElementType>::Apply(const ElementType input) const
+        {
+            return ((input > 0) ? input : 0);
+        }
+
+        template <typename ElementType>
+        std::unique_ptr<ActivationImpl<ElementType>> ReLUActivation<ElementType>::Copy() const
+        {
+            return std::make_unique<ReLUActivation<ElementType>>();
+        }
+    } // namespace neural
+} // namespace predictors
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/predictors/neural/include/RegionDetectionLayer.h b/libraries/predictors/neural/include/RegionDetectionLayer.h
index 37f70ccb2..4d4ac7348 100644
--- a/libraries/predictors/neural/include/RegionDetectionLayer.h
+++ b/libraries/predictors/neural/include/RegionDetectionLayer.h
@@ -90,4 +90,146 @@ namespace predictors
 } // namespace predictors
 } // namespace ell
 
-#include "../tcc/RegionDetectionLayer.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace predictors
+{
+    namespace neural
+    {
+        template <typename ElementType>
+        RegionDetectionLayer<ElementType>::RegionDetectionLayer(const LayerParameters& layerParameters, RegionDetectionParameters regionDetectionParams) :
+            Base(layerParameters),
+            _regionDetectionParams(std::move(regionDetectionParams))
+        {
+            if (_regionDetectionParams.numAnchors <= 0)
+            {
+                throw std::invalid_argument("regionDetectionParams.numAnchors <= 0");
+            }
+
+            if (_regionDetectionParams.width <= 0)
+            {
+                throw std::invalid_argument("regionDetectionParams.width <= 0");
+            }
+
+            if (_regionDetectionParams.height <= 0)
+            {
+                throw std::invalid_argument("regionDetectionParams.height <= 0");
+            }
+
+            if (_regionDetectionParams.numBoxesPerCell <= 0)
+            {
+                throw std::invalid_argument("regionDetectionParams.numBoxesPerCell <= 0");
+            }
+
+            if (_regionDetectionParams.numClasses <= 0)
+            {
+                throw std::invalid_argument("regionDetectionParams.numClasses <= 0");
+            }
+
+            if (this->_layerParameters.input.NumRows() != (size_t)_regionDetectionParams.width)
+            {
+                throw std::invalid_argument("input number of rows doesn't match width in detection parameters");
+            }
+
+            if (this->_layerParameters.input.NumColumns() != (size_t)_regionDetectionParams.height)
+            {
+                throw std::invalid_argument("input number of columns doesn't match height in detection parameters");
+            }
+
+            if (this->_layerParameters.input.NumChannels() != (size_t)((
+                                                                  (_regionDetectionParams.numAnchors + 1 + _regionDetectionParams.numClasses) *
+                                                                  _regionDetectionParams.numBoxesPerCell)))
+            {
+                throw std::invalid_argument("input number of channels doesn't match box size * number of boxes in detection parameters");
+            }
+        }
+
+        template <typename ElementType>
+        void RegionDetectionLayer<ElementType>::Compute()
+        {
+            auto output = this->GetOutputMinusPadding();
+            auto& input = this->_layerParameters.input;
+            auto numAnchors = _regionDetectionParams.numAnchors;
+
+            assert(output.GetShape() == input.GetShape());
+
+            // The input has the shape of width x height x ((5 + classes) * numBoxes)
+            // Each "cell" in the third dimension has the format
+            // [tx, ty, tw, th, tc, class probabilities...] for each "box".
+            // The first four (tx, ty, tw, th) are coordinates that define
+            // the bounding box for the region where the network thinks an
+            // object might be. tc is the confidence on the presence of an
+            // object at all, and should be considered the scale of the
+            // class probabilities.
+
+            SigmoidActivation<ElementType> sigmoid;
+            SoftMaxActivation<ElementType> softmax;
+
+            for (int i = 0; i < _regionDetectionParams.width; ++i)
+            {
+                for (int j = 0; j < _regionDetectionParams.height; ++j)
+                {
+                    auto outputChannelVector = output.template GetSlice<math::Dimension::channel>(i, j);
+                    auto inputChannelVector = input.template GetSlice<math::Dimension::channel>(i, j);
+
+                    for (int k = 0; k < _regionDetectionParams.numBoxesPerCell; ++k)
+                    {
+                        auto boxOffset = k * (numAnchors + 1 + _regionDetectionParams.numClasses);
+
+                        // Get the vector for the anchors for both output and input
+                        auto outputAnchors = outputChannelVector.GetSubVector(boxOffset, numAnchors);
+                        auto inputAnchors = inputChannelVector.GetSubVector(boxOffset, numAnchors);
+
+                        // Copy input over to output
+                        outputAnchors.CopyFrom(inputAnchors);
+
+                        // Apply sigmoid to the confidence value, which is immediately after the anchor points
+                        outputChannelVector[boxOffset + numAnchors] = sigmoid(inputChannelVector[boxOffset + numAnchors]);
+
+                        // Get the vector for the class probabilities for both output and input
+                        auto outputClassProbabilities = outputChannelVector.GetSubVector(boxOffset + numAnchors + 1, _regionDetectionParams.numClasses);
+                        auto inputClassProbabilities = inputChannelVector.GetSubVector(boxOffset + numAnchors + 1, _regionDetectionParams.numClasses);
+
+                        // Copy input over to output
+                        outputClassProbabilities.CopyFrom(inputClassProbabilities);
+
+                        if (_regionDetectionParams.applySoftmax)
+                        {
+                            // Apply softmax to probabilities
+                            softmax(outputClassProbabilities);
+                        }
+                    }
+                }
+            }
+        }
+
+        template <typename ElementType>
+        void RegionDetectionLayer<ElementType>::WriteToArchive(utilities::Archiver& archiver) const
+        {
+            Layer<ElementType>::WriteToArchive(archiver);
+
+            archiver["width"] << _regionDetectionParams.width;
+            archiver["height"] << _regionDetectionParams.height;
+            archiver["numBoxesPerCell"] << _regionDetectionParams.numBoxesPerCell;
+            archiver["numClasses"] << _regionDetectionParams.numClasses;
+            archiver["numCoordinates"] << _regionDetectionParams.numAnchors;
+        }
+
+        template <typename ElementType>
+        void RegionDetectionLayer<ElementType>::ReadFromArchive(utilities::Unarchiver& unarchiver)
+        {
+            Layer<ElementType>::ReadFromArchive(unarchiver);
+
+            unarchiver["width"] >> _regionDetectionParams.width;
+            unarchiver["height"] >> _regionDetectionParams.height;
+            unarchiver["numBoxesPerCell"] >> _regionDetectionParams.numBoxesPerCell;
+            unarchiver["numClasses"] >> _regionDetectionParams.numClasses;
+            unarchiver["numCoordinates"] >> _regionDetectionParams.numAnchors;
+        }
+    } // namespace neural
+} // namespace predictors
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/predictors/neural/include/ScalingLayer.h b/libraries/predictors/neural/include/ScalingLayer.h
index 85c56d8c1..8a8569a97 100644
--- a/libraries/predictors/neural/include/ScalingLayer.h
+++ b/libraries/predictors/neural/include/ScalingLayer.h
@@ -72,4 +72,48 @@ namespace predictors
 } // namespace predictors
 } // namespace ell
 
-#include "../tcc/ScalingLayer.tcc"
\ No newline at end of file
+#pragma region implementation
+
+namespace ell
+{
+namespace predictors
+{
+    namespace neural
+    {
+        template <typename ElementType>
+        ScalingLayer<ElementType>::ScalingLayer(const LayerParameters& layerParameters, const VectorType& scales) :
+            Layer<ElementType>(layerParameters),
+            _scales(scales)
+        {
+        }
+
+        template <typename ElementType>
+        void ScalingLayer<ElementType>::Compute()
+        {
+            auto output = GetOutputMinusPadding();
+            auto& input = _layerParameters.input;
+
+            AssignValues(input, output);
+            math::ScaleUpdate<math::Dimension::channel>(_scales, output);
+        }
+
+        template <typename ElementType>
+        void ScalingLayer<ElementType>::WriteToArchive(utilities::Archiver& archiver) const
+        {
+            Layer<ElementType>::WriteToArchive(archiver);
+
+            math::VectorArchiver::Write(_scales, "scales", archiver);
+        }
+
+        template <typename ElementType>
+        void ScalingLayer<ElementType>::ReadFromArchive(utilities::Unarchiver& archiver)
+        {
+            Layer<ElementType>::ReadFromArchive(archiver);
+
+            math::VectorArchiver::Read(_scales, "scales", archiver);
+        }
+    } // namespace neural
+} // namespace predictors
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/predictors/neural/include/SigmoidActivation.h b/libraries/predictors/neural/include/SigmoidActivation.h
index 7721a524e..f70067862 100644
--- a/libraries/predictors/neural/include/SigmoidActivation.h
+++ b/libraries/predictors/neural/include/SigmoidActivation.h
@@ -49,4 +49,40 @@ namespace predictors
 } // namespace predictors
 } // namespace ell
 
-#include "../tcc/SigmoidActivation.tcc"
+#pragma region implementation
+
+#include <cmath>
+
+namespace ell
+{
+namespace predictors
+{
+    namespace neural
+    {
+        template <typename ElementType>
+        ElementType SigmoidActivation<ElementType>::Apply(const ElementType input) const
+        {
+            ElementType output;
+            if (input >= 0.0)
+            {
+                double exp_value = std::exp(-input);
+                output = static_cast<ElementType>(1.0 / (1.0 + exp_value));
+            }
+            else
+            {
+                double exp_value = std::exp(input);
+                output = static_cast<ElementType>(exp_value / (1.0 + exp_value));
+            }
+            return output;
+        }
+
+        template <typename ElementType>
+        std::unique_ptr<ActivationImpl<ElementType>> SigmoidActivation<ElementType>::Copy() const
+        {
+            return std::make_unique<SigmoidActivation<ElementType>>();
+        }
+    } // namespace neural
+} // namespace predictors
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/predictors/neural/include/SoftMaxActivation.h b/libraries/predictors/neural/include/SoftMaxActivation.h
index 92257aadd..7476c93c2 100644
--- a/libraries/predictors/neural/include/SoftMaxActivation.h
+++ b/libraries/predictors/neural/include/SoftMaxActivation.h
@@ -52,4 +52,50 @@ namespace predictors
 } // namespace predictors
 } // namespace ell
 
-#include "../tcc/SoftMaxActivation.tcc"
+#pragma region implementation
+
+#include <algorithm>
+#include <limits>
+
+namespace ell
+{
+namespace predictors
+{
+    namespace neural
+    {
+        template <typename ElementType>
+        void SoftMaxActivation<ElementType>::Apply(math::ColumnVectorReference<ElementType>& input) const
+        {
+            ElementType maxVal = std::numeric_limits<ElementType>::lowest();
+            for (size_t i = 0; i < input.Size(); ++i)
+            {
+                maxVal = std::max(maxVal, input[i]);
+            }
+
+            ElementType sum = 0;
+            for (size_t i = 0; i < input.Size(); ++i)
+            {
+                const auto eulerVal = static_cast<ElementType>(std::exp(input[i] - maxVal));
+                input[i] = eulerVal;
+                sum += eulerVal;
+            }
+
+            const ElementType epsilon = static_cast<ElementType>(1e-7);
+            if (sum < epsilon)
+            {
+                sum = 1.0;
+            }
+
+            input.Transform([sum](ElementType value) { return value / sum; });
+        }
+
+        template <typename ElementType>
+        void SoftMaxActivation<ElementType>::operator()(math::ColumnVectorReference<ElementType>& input) const
+        {
+            return Apply(input);
+        }
+    } // namespace neural
+} // namespace predictors
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/predictors/neural/include/SoftmaxLayer.h b/libraries/predictors/neural/include/SoftmaxLayer.h
index 569b0b8d0..4fabe5653 100644
--- a/libraries/predictors/neural/include/SoftmaxLayer.h
+++ b/libraries/predictors/neural/include/SoftmaxLayer.h
@@ -60,4 +60,81 @@ namespace predictors
 } // namespace predictors
 } // namespace ell
 
-#include "../tcc/SoftmaxLayer.tcc"
\ No newline at end of file
+#pragma region implementation
+
+#include <limits>
+
+namespace ell
+{
+namespace predictors
+{
+    namespace neural
+    {
+
+        template <typename ElementType>
+        SoftmaxLayer<ElementType>::SoftmaxLayer(const LayerParameters& layerParameters) :
+            Layer<ElementType>(layerParameters)
+        {
+            if (_layerParameters.input.Size() != GetOutputMinusPadding().Size())
+            {
+                throw utilities::InputException(utilities::InputExceptionErrors::sizeMismatch, GetRuntimeTypeName() + ": Expected size of input and output tensor (minus padding) to match");
+            }
+        }
+
+        template <typename ElementType>
+        void SoftmaxLayer<ElementType>::Compute()
+        {
+            auto output = GetOutputMinusPadding();
+            auto& input = _layerParameters.input;
+
+            AssignValues(input, output);
+
+            ElementType sum = 0;
+            ElementType maxValue = std::numeric_limits<ElementType>::lowest();
+
+            // Find the max
+            for (size_t i = 0; i < input.NumRows(); i++)
+            {
+                for (size_t j = 0; j < input.NumColumns(); j++)
+                {
+                    for (size_t k = 0; k < input.NumChannels(); k++)
+                    {
+                        ElementType value = input(i, j, k);
+                        maxValue = std::max(maxValue, value);
+                    }
+                }
+            }
+
+            // Use the max to calculate the Euler value
+            for (size_t i = 0; i < input.NumRows(); i++)
+            {
+                for (size_t j = 0; j < input.NumColumns(); j++)
+                {
+                    for (size_t k = 0; k < input.NumChannels(); k++)
+                    {
+                        ElementType value = input(i, j, k);
+                        ElementType eulerVal = std::exp(value - maxValue);
+                        output(i, j, k) = eulerVal;
+                        sum += eulerVal;
+                    }
+                }
+            }
+
+            // Divide the value by the sum. After this, the sum of all values will be 1.0
+            for (size_t i = 0; i < input.NumRows(); i++)
+            {
+                for (size_t j = 0; j < input.NumColumns(); j++)
+                {
+                    for (size_t k = 0; k < input.NumChannels(); k++)
+                    {
+                        output(i, j, k) /= sum;
+                    }
+                }
+            }
+        }
+
+    } // namespace neural
+} // namespace predictors
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/predictors/neural/include/TanhActivation.h b/libraries/predictors/neural/include/TanhActivation.h
index 4402459fd..1c73b8a92 100644
--- a/libraries/predictors/neural/include/TanhActivation.h
+++ b/libraries/predictors/neural/include/TanhActivation.h
@@ -48,4 +48,27 @@ namespace predictors
 } // namespace predictors
 } // namespace ell
 
-#include "../tcc/TanhActivation.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace predictors
+{
+    namespace neural
+    {
+        template <typename ElementType>
+        ElementType TanhActivation<ElementType>::Apply(const ElementType input) const
+        {
+            return std::tanh(input);
+        }
+
+        template <typename ElementType>
+        std::unique_ptr<ActivationImpl<ElementType>> TanhActivation<ElementType>::Copy() const
+        {
+            return std::make_unique<TanhActivation<ElementType>>();
+        }
+    } // namespace neural
+} // namespace predictors
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/predictors/neural/tcc/Activation.tcc b/libraries/predictors/neural/tcc/Activation.tcc
deleted file mode 100644
index f153cae81..000000000
--- a/libraries/predictors/neural/tcc/Activation.tcc
+++ /dev/null
@@ -1,94 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     Activation.tcc (neural)
-//  Authors:  Chris Lovett
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#include <algorithm>
-#include <limits>
-
-namespace ell
-{
-namespace predictors
-{
-    namespace neural
-    {
-        template <typename ElementType>
-        ElementType ActivationImpl<ElementType>::operator()(const ElementType input) const
-        {
-            return Apply(input);
-        }
-
-        template <typename ElementType>
-        ElementType ActivationImpl<ElementType>::ApplyIndex(const ElementType input, const math::IntegerTriplet& /*index*/) const
-        {
-            return Apply(input);
-        }
-
-        template <typename ElementType>
-        Activation<ElementType>::Activation(std::unique_ptr<ActivationImpl<ElementType>>& impl) :
-            _impl(std::move(impl))
-        {}
-
-        template <typename ElementType>
-        Activation<ElementType>::Activation(ActivationImpl<ElementType>* impl) :
-            _impl(impl)
-        {
-        }
-
-        template <typename ElementType>
-        Activation<ElementType>::Activation(const Activation<ElementType>& other) :
-            _impl(std::move(other._impl->Copy()))
-        {}
-
-        template <typename ElementType>
-        Activation<ElementType>& Activation<ElementType>::operator=(const Activation<ElementType>& other)
-        {
-            if (this != &other)
-            {
-                auto temp = other._impl->Copy();
-                _impl.swap(temp);
-            }
-            return *this;
-        }
-
-        template <typename ElementType>
-        ElementType Activation<ElementType>::Apply(const ElementType input) const
-        {
-            return _impl->Apply(input);
-        }
-
-        template <typename ElementType>
-        ElementType Activation<ElementType>::operator()(const ElementType input) const
-        {
-            return _impl->Apply(input);
-        }
-
-        template <typename ElementType>
-        ElementType Activation<ElementType>::ApplyIndex(const ElementType input, const math::IntegerTriplet& index) const
-        {
-            return _impl->ApplyIndex(input, index);
-        }
-
-        template <typename ElementType>
-        void Activation<ElementType>::Apply(math::ColumnVector<ElementType>& input) const
-        {
-            input.Transform([this](ElementType value) { return _impl->Apply(value); });
-        }
-
-        template <typename ElementType>
-        void Activation<ElementType>::WriteToArchive(utilities::Archiver& archiver) const
-        {
-            archiver["activation"] << _impl;
-        }
-
-        template <typename ElementType>
-        void Activation<ElementType>::ReadFromArchive(utilities::Unarchiver& archiver)
-        {
-            archiver["activation"] >> _impl;
-        }
-    } // namespace neural
-} // namespace predictors
-} // namespace ell
diff --git a/libraries/predictors/neural/tcc/ActivationLayer.tcc b/libraries/predictors/neural/tcc/ActivationLayer.tcc
deleted file mode 100644
index fa4a254ec..000000000
--- a/libraries/predictors/neural/tcc/ActivationLayer.tcc
+++ /dev/null
@@ -1,83 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     ActivationLayer.tcc (neural)
-//  Authors:  Byron Changuion
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace predictors
-{
-    namespace neural
-    {
-        template <typename ElementType>
-        ActivationLayer<ElementType>::ActivationLayer(const LayerParameters& layerParameters, const ActivationType& activation) :
-            Layer<ElementType>(layerParameters),
-            _activation(activation)
-        {
-            ValidateDimensions();
-        }
-
-        template <typename ElementType>
-        ActivationLayer<ElementType>::ActivationLayer(const ActivationLayer& other) :
-            Layer<ElementType>(other),
-            _activation(other._activation)
-        {
-        }
-
-        template <typename ElementType>
-        void ActivationLayer<ElementType>::ValidateDimensions()
-        {
-            auto output = GetOutputMinusPadding();
-            auto& input = _layerParameters.input;
-            if (input.NumRows() > output.NumRows() || input.NumColumns() > output.NumColumns() || input.NumChannels() > output.NumChannels())
-            {
-                throw utilities::InputException(utilities::InputExceptionErrors::sizeMismatch, "Input tensor must not exceed output tensor (minus padding) dimensions for activation layer.");
-            }
-        }
-
-        template <typename ElementType>
-        void ActivationLayer<ElementType>::Compute()
-        {
-            auto output = GetOutputMinusPadding();
-            auto input = _layerParameters.input;
-
-            for (size_t i = 0; i < input.NumRows(); i++)
-            {
-                for (size_t j = 0; j < input.NumColumns(); j++)
-                {
-                    for (size_t k = 0; k < input.NumChannels(); k++)
-                    {
-                        ElementType value = input(i, j, k);
-                        output(i, j, k) = _activation.ApplyIndex(value, math::IntegerTriplet{ i, j, k });
-                    }
-                }
-            }
-        }
-
-        template <typename ElementType>
-        void ActivationLayer<ElementType>::WriteToArchive(utilities::Archiver& archiver) const
-        {
-            Layer<ElementType>::WriteToArchive(archiver);
-            _activation.WriteToArchive(archiver);
-        }
-
-        template <typename ElementType>
-        void ActivationLayer<ElementType>::ReadFromArchive(utilities::Unarchiver& archiver)
-        {
-            Layer<ElementType>::ReadFromArchive(archiver);
-
-            if (archiver.HasNextPropertyName("activation"))
-            {
-                _activation.ReadFromArchive(archiver);
-            }
-            if (!_activation.GetImpl())
-            {
-                _activation.LegacyReadFromArchive(archiver);
-            }
-        }
-    } // namespace neural
-} // namespace predictors
-} // namespace ell
diff --git a/libraries/predictors/neural/tcc/BatchNormalizationLayer.tcc b/libraries/predictors/neural/tcc/BatchNormalizationLayer.tcc
deleted file mode 100644
index c5d619a16..000000000
--- a/libraries/predictors/neural/tcc/BatchNormalizationLayer.tcc
+++ /dev/null
@@ -1,91 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     BatchNormalizationLayer.tcc (neural)
-//  Authors:  Byron Changuion
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#include <cmath>
-
-namespace ell
-{
-namespace predictors
-{
-    namespace neural
-    {
-        template <typename ElementType>
-        BatchNormalizationLayer<ElementType>::BatchNormalizationLayer(const LayerParameters& layerParameters, const VectorType& mean, const VectorType& variance, ElementType epsilon, EpsilonSummand epsilonSummand) :
-            Layer<ElementType>(layerParameters),
-            _multiplicationValues(mean.Size()),
-            _additionValues(variance.Size()),
-            _epsilon(epsilon),
-            _epsilonSummand(epsilonSummand)
-        {
-            if (mean.Size() != variance.Size())
-            {
-                throw utilities::InputException(utilities::InputExceptionErrors::sizeMismatch, GetRuntimeTypeName() + ": Size of 'mean' and 'variance' must match");
-            }
-            if (_layerParameters.input.Size() != GetOutputMinusPadding().Size())
-            {
-                throw utilities::InputException(utilities::InputExceptionErrors::sizeMismatch, GetRuntimeTypeName() + ": Expected size of input and output tensor (minus padding) to match");
-            }
-            if (mean.Size() != NumOutputChannels())
-            {
-                throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, GetRuntimeTypeName() + ": Number of 'mean' and 'variance' values must equal number of channels in output");
-            }
-
-            // Batch norm is: outputValue = (inputValue - mean) / (sqrt(variance) + _epsilon)
-            // To turn this into one MultiplyAdd operation, we can rearrange it to:
-            // EpsilonSummand::Variance:
-            //   outputValue = inputValue * (1/(sqrt(variance + _epsilon))) + (-mean * 1/(sqrt(variance + _epsilon)))
-            // EpsilonSummand::SqrtVariance:
-            //   outputValue = inputValue * (1/(sqrt(variance) + _epsilon)) + (-mean * 1/(sqrt(variance) + _epsilon))
-            for (size_t i = 0; i < _additionValues.Size(); i++)
-            {
-                ElementType varianceFactor = (_epsilonSummand == EpsilonSummand::Variance) ? (1 / (std::sqrt(variance[i] + _epsilon))) : (1 / (std::sqrt(variance[i]) + _epsilon));
-
-                _multiplicationValues[i] = varianceFactor;
-                _additionValues[i] = -mean[i] * varianceFactor;
-            }
-        }
-
-        template <typename ElementType>
-        void BatchNormalizationLayer<ElementType>::Compute()
-        {
-            auto output = GetOutputMinusPadding();
-            auto input = _layerParameters.input;
-
-            AssignValues(input, output);
-            math::ScaleAddUpdate<math::Dimension::channel>(_multiplicationValues, _additionValues, output);
-        }
-
-        template <typename ElementType>
-        void BatchNormalizationLayer<ElementType>::WriteToArchive(utilities::Archiver& archiver) const
-        {
-            Layer<ElementType>::WriteToArchive(archiver);
-
-            math::VectorArchiver::Write(_multiplicationValues, "multiplicationValues", archiver);
-            math::VectorArchiver::Write(_additionValues, "additionValues", archiver);
-
-            archiver["epsilon"] << _epsilon;
-            archiver["epsilonSummand"] << static_cast<int>(_epsilonSummand);
-        }
-
-        template <typename ElementType>
-        void BatchNormalizationLayer<ElementType>::ReadFromArchive(utilities::Unarchiver& archiver)
-        {
-            Layer<ElementType>::ReadFromArchive(archiver);
-
-            math::VectorArchiver::Read(_multiplicationValues, "multiplicationValues", archiver);
-            math::VectorArchiver::Read(_additionValues, "additionValues", archiver);
-
-            archiver["epsilon"] >> _epsilon;
-
-            int value;
-            archiver["epsilonSummand"] >> value;
-            _epsilonSummand = static_cast<EpsilonSummand>(value);
-        }
-    } // namespace neural
-} // namespace predictors
-} // namespace ell
diff --git a/libraries/predictors/neural/tcc/BiasLayer.tcc b/libraries/predictors/neural/tcc/BiasLayer.tcc
deleted file mode 100644
index 7a5f99b6d..000000000
--- a/libraries/predictors/neural/tcc/BiasLayer.tcc
+++ /dev/null
@@ -1,59 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     BiasLayer.tcc (neural)
-//  Authors:  Byron Changuion
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace predictors
-{
-    namespace neural
-    {
-
-        template <typename ElementType>
-        BiasLayer<ElementType>::BiasLayer(const LayerParameters& layerParameters, const VectorType& bias) :
-            Layer<ElementType>(layerParameters),
-            _bias(bias)
-        {
-            if (this->GetInputShape() != this->GetOutputShapeMinusPadding())
-            {
-                throw utilities::InputException(utilities::InputExceptionErrors::sizeMismatch, GetRuntimeTypeName() + ": Expected size of input and output tensor (minus padding) to match");
-            }
-            if (_bias.Size() != NumOutputChannels())
-            {
-                throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, GetRuntimeTypeName() + ": Number of 'bias' values must equal number of channels in output");
-            }
-        }
-
-        template <typename ElementType>
-        void BiasLayer<ElementType>::Compute()
-        {
-            auto output = GetOutputMinusPadding();
-            auto input = _layerParameters.input;
-
-            AssignValues(input, output);
-            math::AddUpdate<math::Dimension::channel>(_bias, output);
-        }
-
-        template <typename ElementType>
-        void BiasLayer<ElementType>::WriteToArchive(utilities::Archiver& archiver) const
-        {
-            Layer<ElementType>::WriteToArchive(archiver);
-
-            math::VectorArchiver::Write(_bias, "bias", archiver);
-        }
-
-        template <typename ElementType>
-        void BiasLayer<ElementType>::ReadFromArchive(utilities::Unarchiver& archiver)
-        {
-            Layer<ElementType>::ReadFromArchive(archiver);
-
-            math::VectorArchiver::Read(_bias, "bias", archiver);
-        }
-
-    } // namespace neural
-} // namespace predictors
-} // namespace ell
diff --git a/libraries/predictors/neural/tcc/BinaryConvolutionalLayer.tcc b/libraries/predictors/neural/tcc/BinaryConvolutionalLayer.tcc
deleted file mode 100644
index 6ac37efbb..000000000
--- a/libraries/predictors/neural/tcc/BinaryConvolutionalLayer.tcc
+++ /dev/null
@@ -1,497 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     BinaryConvolutionalLayer.tcc (neural)
-//  Authors:  Byron Changuion
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-// TODO: let's make a popcount function that does the right thing
-#if defined(_MSC_VER)
-#include <intrin.h>
-#define POPCOUNT64 __popcnt64
-#else
-#define POPCOUNT64 __builtin_popcountl
-#endif
-
-namespace ell
-{
-namespace predictors
-{
-    namespace neural
-    {
-        template <typename ElementType>
-        BinaryConvolutionalLayer<ElementType>::BinaryConvolutionalLayer(const LayerParameters& layerParameters, const BinaryConvolutionalParameters& convolutionalParameters, const ConstTensorReferenceType& weights) :
-            Layer<ElementType>(layerParameters),
-            _convolutionalParameters(convolutionalParameters),
-            _realValuedShapedInputMatrix(0, 0),
-            _realValuedWeightsMatrix(0, 0),
-            _realValuedOutputMatrix(0, 0)
-        {
-            if (weights.GetConstDataPointer() == nullptr)
-            {
-                throw utilities::InputException(utilities::InputExceptionErrors::nullReference, "weights tensor has null data field");
-            }
-
-            if (weights.Size() != (NumOutputChannels() * _layerParameters.input.NumChannels() * convolutionalParameters.receptiveField * convolutionalParameters.receptiveField))
-            {
-                throw utilities::InputException(utilities::InputExceptionErrors::sizeMismatch, "weights dimensions for a convolutional layer should be the size of the receptive field volume * number of filters");
-            }
-
-            ComputeWeightsMatrices(weights);
-            InitializeIOMatrices();
-            ComputeShapedInputPaddingMask();
-        }
-
-        template <typename ElementType>
-        void BinaryConvolutionalLayer<ElementType>::ComputeWeightsMatrices(const ConstTensorReferenceType& weights)
-        {
-            const auto filterWidth = _convolutionalParameters.receptiveField;
-
-            _binarizedWeights.resize(NumOutputChannels());
-            _filterMeans.resize(NumOutputChannels());
-            _realValuedWeightsMatrix = MatrixType(NumOutputChannels(), filterWidth * filterWidth * _layerParameters.input.NumChannels());
-
-            const size_t binarizedFilterVolumeSize = ((filterWidth * filterWidth * _layerParameters.input.NumChannels()) + (_binaryElementSize - 1)) / _binaryElementSize;
-
-            // Binarize the weights and calculate the mean per filter
-            auto flattened = weights.ReferenceAsMatrix();
-            for (size_t startRow = 0; startRow < flattened.NumRows() / filterWidth; ++startRow)
-            {
-                // Iterate over the weights corresponding to the filter and calculate the mean
-                ElementType sum = 0;
-                std::vector<ElementType> filterWeights(filterWidth * filterWidth * _layerParameters.input.NumChannels());
-                for (size_t row = 0; row < filterWidth; row++)
-                {
-                    auto weightsVector = flattened.GetMajorVector(startRow * filterWidth + row);
-
-                    for (size_t i = 0; i < weightsVector.Size(); ++i)
-                    {
-                        const size_t columnOffset = row * weightsVector.Size();
-                        ElementType value = weightsVector[i];
-
-                        sum += std::abs(value);
-                        filterWeights[columnOffset + i] = value;
-                    }
-                }
-
-                ElementType mean = sum / static_cast<ElementType>(filterWeights.size());
-                _filterMeans[startRow] = mean;
-
-                // initialize the mean according to the binary weights scale
-                ElementType scale(1.0);
-                if (_convolutionalParameters.weightsScale == BinaryWeightsScale::mean)
-                {
-                    scale = mean;
-                }
-
-                for (size_t i = 0; i < filterWeights.size(); ++i)
-                {
-                    // Set the weights matrix based on the weights value and mean
-                    _realValuedWeightsMatrix(startRow, i) = (filterWeights[i] > 0) ? scale : -scale;
-                }
-
-                // Binarize and pack the weights
-                _binarizedWeights[startRow].resize(binarizedFilterVolumeSize, 0);
-                for (size_t i = 0; i < filterWeights.size(); ++i)
-                {
-                    size_t block = i / _binaryElementSize;
-                    int bit = i % _binaryElementSize;
-                    if (filterWeights[i] > 0)
-                    {
-                        _binarizedWeights[startRow][block] |= ((uint64_t)1 << bit);
-                    }
-                }
-            }
-        }
-
-        template <typename ElementType>
-        void BinaryConvolutionalLayer<ElementType>::InitializeIOMatrices()
-        {
-            const auto filterWidth = _convolutionalParameters.receptiveField;
-            const auto outputShape = NumOutputRowsMinusPadding() * NumOutputColumnsMinusPadding();
-
-            _realValuedShapedInputMatrix = { filterWidth * filterWidth * _layerParameters.input.NumChannels(), outputShape };
-            _realValuedOutputMatrix = { NumOutputChannels(), outputShape };
-
-            _binarizedShapedInput.resize(outputShape);
-            _shapedInputPaddingMask.resize(outputShape);
-            _shapedInputPaddingMaskSums.resize(outputShape);
-            // Set the sizes of the shapedInput and padding mask vectors
-            const size_t binarizedFilterVolumeSize = ((filterWidth * filterWidth * _layerParameters.input.NumChannels()) - 1) / _binaryElementSize + 1;
-            for (size_t i = 0; i < _binarizedShapedInput.size(); ++i)
-            {
-                _binarizedShapedInput[i].resize(binarizedFilterVolumeSize, 0);
-                _shapedInputPaddingMask[i].resize(binarizedFilterVolumeSize, 0);
-            }
-        }
-
-        template <typename ElementType>
-        void BinaryConvolutionalLayer<ElementType>::Compute()
-        {
-            auto output = GetOutputMinusPadding();
-            auto input = _layerParameters.input;
-
-            if (_convolutionalParameters.method == BinaryConvolutionMethod::gemm)
-            {
-                // Re-shape input.
-                ReceptiveFieldToColumns(input, _realValuedShapedInputMatrix);
-
-                // Multiply reshaped input and weights.
-                math::MultiplyScaleAddUpdate(static_cast<ElementType>(1.0), _realValuedWeightsMatrix, _realValuedShapedInputMatrix, static_cast<ElementType>(0.0), _realValuedOutputMatrix);
-
-                // Re-shape the output into the output tensor
-                for (size_t i = 0; i < output.NumRows(); ++i)
-                {
-                    for (size_t j = 0; j < output.NumColumns(); ++j)
-                    {
-                        for (size_t k = 0; k < output.NumChannels(); ++k)
-                        {
-                            size_t row = k;
-                            size_t column = (i * output.NumColumns()) + j;
-                            output(i, j, k) = _realValuedOutputMatrix(row, column);
-                        }
-                    }
-                }
-            }
-            else
-            {
-                // Use the bitwise method
-                // Binarize and pack the input
-                ReceptiveFieldToBinaryRows(input, _binarizedShapedInput);
-
-                // XOR and sum
-                const size_t filterSize = _convolutionalParameters.receptiveField * _convolutionalParameters.receptiveField * input.NumChannels();
-                const size_t binarizedFilterSize = _binarizedWeights[0].size();
-                const size_t filterDrop = filterSize % _binaryElementSize;
-                const size_t filterAdjust = _binaryElementSize - filterDrop;
-
-                // Iterate over filters
-                for (size_t i = 0; i < output.NumRows(); ++i)
-                {
-                    size_t shapedInputOffset = i * NumOutputColumnsMinusPadding();
-                    for (size_t j = 0; j < output.NumColumns(); ++j)
-                    {
-                        for (size_t k = 0; k < output.NumChannels(); ++k)
-                        {
-                            ElementType sum = 0;
-
-                            auto& binarizedWeights = _binarizedWeights[k];
-                            auto& binarizedShapedInput = _binarizedShapedInput[shapedInputOffset + j];
-                            auto& shapedInputPaddingMask = _shapedInputPaddingMask[shapedInputOffset + j];
-
-                            for (size_t blockIndex = 0; blockIndex < binarizedFilterSize; blockIndex++)
-                            {
-                                const uint64_t fValue = binarizedWeights[blockIndex];
-                                const uint64_t iValue = binarizedShapedInput[blockIndex];
-
-                                if (HasInputZeroPadding())
-                                {
-                                    // Zeros are neither -1 nor 1, mask out the effects
-                                    // of zero padding from the XOR product
-                                    // This logic is only applied to zero padding where the effect
-                                    // of inserting zeros is well-known, other padding
-                                    // schemes that can generate zero values are not special-cased.
-                                    const uint64_t maskValue = shapedInputPaddingMask[blockIndex];
-                                    const uint64_t xorProduct = maskValue & (fValue ^ iValue);
-
-                                    // Apply the actual zero padding, which is to "add back" the number of values
-                                    // that were assumed to be -1
-                                    sum += (2.0f * POPCOUNT64(xorProduct) - _binaryElementSize + POPCOUNT64(~maskValue));
-                                }
-                                else
-                                {
-                                    const uint64_t xorProduct = fValue ^ iValue;
-                                    sum += (2.0f * POPCOUNT64(xorProduct) - _binaryElementSize);
-                                }
-                            }
-
-                            ElementType scale(1.0);
-                            if (_convolutionalParameters.weightsScale == BinaryWeightsScale::mean)
-                            {
-                                scale = _filterMeans[k];
-                            }
-
-                            if (filterDrop == 0)
-                            {
-                                output(i, j, k) = (-scale * sum);
-                            }
-                            else
-                            {
-                                output(i, j, k) = (-scale * (sum + filterAdjust));
-                            }
-                        }
-                    }
-                }
-            }
-        }
-
-        // Fills a vector of vectors where each row is the values of the receptive field from the input stretched into a vector,
-        // and the number of vectors is equal to the number of locations that a receptive field is slid over the input volume.
-        template <typename ElementType>
-        void BinaryConvolutionalLayer<ElementType>::ReceptiveFieldToBinaryRows(ConstTensorReferenceType input, std::vector<std::vector<uint64_t>>& shapedInput)
-        {
-            const size_t fieldVolumeSize = _convolutionalParameters.receptiveField * _convolutionalParameters.receptiveField * _layerParameters.input.NumChannels();
-            const size_t outputHeight = NumOutputRowsMinusPadding();
-            const size_t outputWidth = NumOutputColumnsMinusPadding();
-            const size_t rowMax = outputWidth * outputHeight;
-
-            for (size_t outRow = 0; outRow < rowMax; ++outRow)
-            {
-                const size_t convolutionalRow = outRow / outputWidth;
-                const size_t convolutionalCol = outRow % outputWidth;
-                const size_t horizontalStart = (convolutionalCol * _convolutionalParameters.stride);
-                const size_t verticalStart = (convolutionalRow * _convolutionalParameters.stride);
-
-                for (size_t f = 0; f < fieldVolumeSize; ++f)
-                {
-                    // Calculate the col, row, depth values in the convolutional field volume
-                    const size_t volDepth = f % input.NumChannels();
-                    const size_t volCol = (f / input.NumChannels()) % _convolutionalParameters.receptiveField;
-                    const size_t volRow = (f / input.NumChannels()) / _convolutionalParameters.receptiveField;
-
-                    // Calculate where this fits in relation to the input volume
-                    const intptr_t sourceCol = horizontalStart + volCol;
-                    const intptr_t sourceRow = verticalStart + volRow;
-                    const intptr_t sourceDepth = volDepth;
-
-                    ElementType value = input(sourceRow, sourceCol, sourceDepth);
-                    const size_t block = (f / _binaryElementSize);
-                    const size_t bit = f % _binaryElementSize;
-
-                    if (bit == 0)
-                    {
-                        // Initialize to zero
-                        shapedInput[outRow][block] = static_cast<uint64_t>(0);
-                    }
-
-                    // Set the bit value
-                    if (value > 0)
-                    {
-                        shapedInput[outRow][block] += ((uint64_t)1 << bit);
-                    }
-                }
-            }
-        }
-
-        template <typename ElementType>
-        void BinaryConvolutionalLayer<ElementType>::ReceptiveFieldToColumns(ConstTensorReferenceType input, MatrixType& shapedInput)
-        {
-            const size_t fieldVolumeSize = _convolutionalParameters.receptiveField * _convolutionalParameters.receptiveField * _layerParameters.input.NumChannels();
-            const size_t convolutionalHeight = NumOutputRowsMinusPadding();
-            const size_t convolutionalWidth = NumOutputColumnsMinusPadding();
-
-            for (size_t f = 0; f < fieldVolumeSize; ++f)
-            {
-                const size_t fieldDepth = f % _layerParameters.input.NumChannels();
-                const size_t fieldColumn = (f / _layerParameters.input.NumChannels()) % _convolutionalParameters.receptiveField;
-                const size_t fieldRow = (f / _layerParameters.input.NumChannels()) / _convolutionalParameters.receptiveField;
-
-                size_t rowOffset = 0;
-                for (size_t h = 0; h < convolutionalHeight; ++h)
-                {
-                    size_t colOffset = 0;
-                    for (size_t w = 0; w < convolutionalWidth; ++w)
-                    {
-                        size_t inputRow = rowOffset + fieldRow;
-                        size_t inputCol = colOffset + fieldColumn;
-
-                        ElementType value = input(inputRow, inputCol, fieldDepth);
-
-                        // Don't binarize zero-padded input when weights are not scaled
-                        if (IsInputZeroPadding(inputRow, inputCol))
-                        {
-                            shapedInput(f, h * convolutionalWidth + w) = value;
-                        }
-                        else
-                        {
-                            shapedInput(f, h * convolutionalWidth + w) = (value > 0) ? 1.0f : -1.0f;
-                        }
-
-                        colOffset += _convolutionalParameters.stride;
-                    }
-                    rowOffset += _convolutionalParameters.stride;
-                }
-            }
-        }
-
-        template <typename ElementType>
-        bool BinaryConvolutionalLayer<ElementType>::HasInputZeroPadding() const
-        {
-            return HasPadding(_layerParameters.inputPaddingParameters, PaddingScheme::zeros);
-        }
-
-        template <typename ElementType>
-        bool BinaryConvolutionalLayer<ElementType>::IsInputZeroPadding(size_t row, size_t column) const
-        {
-            if (HasInputZeroPadding())
-            {
-                const size_t paddingSize = _layerParameters.inputPaddingParameters.paddingSize;
-                const size_t rowPaddingRightIndex = _layerParameters.input.NumRows() - paddingSize;
-                const size_t columnPaddingRightIndex = _layerParameters.input.NumColumns() - paddingSize;
-
-                return row < paddingSize || row >= rowPaddingRightIndex ||
-                       column < paddingSize || column >= columnPaddingRightIndex;
-            }
-
-            return false;
-        }
-
-        template <typename ElementType>
-        void BinaryConvolutionalLayer<ElementType>::WriteToArchive(utilities::Archiver& archiver) const
-        {
-            const size_t filterWidth = _convolutionalParameters.receptiveField;
-            const size_t binarizedFilterVolumeSize = ((filterWidth * filterWidth * _layerParameters.input.NumChannels()) + (_binaryElementSize - 1)) / _binaryElementSize;
-
-            Layer<ElementType>::WriteToArchive(archiver);
-
-            archiver["receptiveField"] << _convolutionalParameters.receptiveField;
-            archiver["stride"] << _convolutionalParameters.stride;
-            archiver["method"] << static_cast<int>(_convolutionalParameters.method);
-            archiver["weightsScale"] << static_cast<int>(_convolutionalParameters.weightsScale);
-
-            // Compute binarized weights
-            size_t numRows = _realValuedWeightsMatrix.NumRows();
-            size_t numCols = _realValuedWeightsMatrix.NumColumns();
-            std::vector<std::vector<uint64_t>> binarizedWeights(numRows);
-            for (size_t rowIndex = 0; rowIndex < numRows; ++rowIndex)
-            {
-                binarizedWeights[rowIndex].resize(binarizedFilterVolumeSize, 0);
-                for (size_t colIndex = 0; colIndex < numCols; ++colIndex)
-                {
-                    size_t block = colIndex / _binaryElementSize;
-                    int bit = colIndex % _binaryElementSize;
-                    if (_realValuedWeightsMatrix(rowIndex, colIndex) > 0)
-                    {
-                        binarizedWeights[rowIndex][block] |= ((uint64_t)1 << bit);
-                    }
-                }
-            }
-
-            std::vector<uint64_t> temp;
-            archiver["binarizedWeights_numVectors"] << binarizedWeights.size();
-            for (size_t i = 0; i < binarizedWeights.size(); ++i)
-            {
-                temp.insert(temp.end(), binarizedWeights[i].begin(), binarizedWeights[i].end());
-            }
-            archiver["binarizedWeights_values"] << temp;
-            temp.clear();
-            archiver["filterMeans"] << _filterMeans;
-        }
-
-        template <typename ElementType>
-        void BinaryConvolutionalLayer<ElementType>::ReadFromArchive(utilities::Unarchiver& archiver)
-        {
-            Layer<ElementType>::ReadFromArchive(archiver);
-
-            archiver["receptiveField"] >> _convolutionalParameters.receptiveField;
-            archiver["stride"] >> _convolutionalParameters.stride;
-            int method;
-            archiver["method"] >> method;
-            _convolutionalParameters.method = static_cast<BinaryConvolutionMethod>(method);
-            int weightsScale;
-            archiver["weightsScale"] >> weightsScale;
-            _convolutionalParameters.weightsScale = static_cast<BinaryWeightsScale>(weightsScale);
-
-            size_t numVectors = 0;
-            std::vector<uint64_t> temp;
-            const size_t binarizedFilterVolumeSize = ((_convolutionalParameters.receptiveField * _convolutionalParameters.receptiveField * _layerParameters.input.NumChannels()) + (_binaryElementSize - 1)) / _binaryElementSize;
-            archiver["binarizedWeights_numVectors"] >> numVectors;
-            archiver["binarizedWeights_values"] >> temp;
-            _binarizedWeights.resize(numVectors);
-            for (size_t i = 0; i < _binarizedWeights.size(); ++i)
-            {
-                _binarizedWeights[i].resize(binarizedFilterVolumeSize, 0);
-                for (size_t j = 0; j < binarizedFilterVolumeSize; ++j)
-                {
-                    _binarizedWeights[i][j] = temp[i * binarizedFilterVolumeSize + j];
-                }
-            }
-            archiver["filterMeans"] >> _filterMeans;
-
-            ComputeRealValuedWeightsMatrix();
-            InitializeIOMatrices();
-            ComputeShapedInputPaddingMask();
-        }
-
-        template <typename ElementType>
-        void BinaryConvolutionalLayer<ElementType>::ComputeRealValuedWeightsMatrix()
-        {
-            const auto filterWidth = _convolutionalParameters.receptiveField;
-            const auto numWeightsColumns = filterWidth * filterWidth * _layerParameters.input.NumChannels();
-            const size_t binarizedFilterVolumeSize = (numWeightsColumns - 1) / _binaryElementSize + 1;
-
-            _realValuedWeightsMatrix = { NumOutputChannels(), numWeightsColumns };
-            for (size_t rowIndex = 0; rowIndex < _binarizedWeights.size(); ++rowIndex)
-            {
-                size_t colIndex = 0;
-                assert(binarizedFilterVolumeSize == _binarizedWeights[rowIndex].size());
-                for (size_t blockIndex = 0; blockIndex < binarizedFilterVolumeSize; blockIndex++)
-                {
-                    const auto bits = _binarizedWeights[rowIndex][blockIndex];
-                    const auto filterMean = _filterMeans[rowIndex];
-
-                    ElementType scale(1.0);
-                    if (_convolutionalParameters.weightsScale == BinaryWeightsScale::mean)
-                    {
-                        scale = filterMean;
-                    }
-
-                    for (size_t bitIndex = 0; bitIndex < _binaryElementSize && colIndex < numWeightsColumns; ++bitIndex, ++colIndex)
-                    {
-                        const auto bitVal = (bits >> bitIndex) & 0x01;
-                        _realValuedWeightsMatrix(rowIndex, colIndex) = bitVal == 0 ? -scale : scale;
-                    }
-                }
-            }
-        }
-
-        template <typename ElementType>
-        void BinaryConvolutionalLayer<ElementType>::ComputeShapedInputPaddingMask()
-        {
-            const size_t fieldVolumeSize = _convolutionalParameters.receptiveField * _convolutionalParameters.receptiveField * _layerParameters.input.NumChannels();
-            const size_t outputHeight = NumOutputRowsMinusPadding();
-            const size_t outputWidth = NumOutputColumnsMinusPadding();
-            const size_t rowMax = outputWidth * outputHeight;
-
-            for (size_t outRow = 0; outRow < rowMax; ++outRow)
-            {
-                const size_t convolutionalRow = outRow / outputWidth;
-                const size_t convolutionalCol = outRow % outputWidth;
-                const size_t horizontalStart = (convolutionalCol * _convolutionalParameters.stride);
-                const size_t verticalStart = (convolutionalRow * _convolutionalParameters.stride);
-                int maskSum = 0;
-
-                for (size_t f = 0; f < fieldVolumeSize; ++f)
-                {
-                    // Calculate the col, row, and depth values in the convolutional field volume
-                    const size_t volCol = (f / _layerParameters.input.NumChannels()) % _convolutionalParameters.receptiveField;
-                    const size_t volRow = (f / _layerParameters.input.NumChannels()) / _convolutionalParameters.receptiveField;
-
-                    // Calculate where this fits in relation to the input volume
-                    const intptr_t sourceCol = horizontalStart + volCol;
-                    const intptr_t sourceRow = verticalStart + volRow;
-
-                    const size_t block = f / _binaryElementSize;
-                    const size_t bit = f % _binaryElementSize;
-
-                    if (bit == 0)
-                    {
-                        // Initialize to ones
-                        _shapedInputPaddingMask[outRow][block] = std::numeric_limits<uint64_t>::max();
-                    }
-
-                    // Set the mask for zero padding, so that the effect of these
-                    // on the bitwise operation is removed
-                    if (IsInputZeroPadding(sourceRow, sourceCol))
-                    {
-                        _shapedInputPaddingMask[outRow][block] -= ((uint64_t)1 << bit);
-                        maskSum += 1;
-                    }
-                }
-                _shapedInputPaddingMaskSums[outRow] = maskSum;
-            }
-        }
-    } // namespace neural
-} // namespace predictors
-} // namespace ell
diff --git a/libraries/predictors/neural/tcc/ConvolutionalLayer.tcc b/libraries/predictors/neural/tcc/ConvolutionalLayer.tcc
deleted file mode 100644
index e02d46c62..000000000
--- a/libraries/predictors/neural/tcc/ConvolutionalLayer.tcc
+++ /dev/null
@@ -1,283 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     ConvolutionalLayer.tcc (neural)
-//  Authors:  Byron Changuion
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-// #include <dsp/include/DiagonalConvolution.h>
-#include <dsp/include/SimpleConvolution.h>
-#include <dsp/include/UnrolledConvolution.h>
-#include <dsp/include/WinogradConvolution.h>
-
-namespace ell
-{
-namespace predictors
-{
-    namespace neural
-    {
-        template <typename ElementType>
-        ConvolutionalLayer<ElementType>::ConvolutionalLayer(const LayerParameters& layerParameters, const ConvolutionalParameters& convolutionalParameters, TensorType weights) :
-            Layer<ElementType>(layerParameters),
-            _convolutionalParameters(convolutionalParameters),
-            _weights(std::move(weights)),
-            _shapedInput{ _convolutionalParameters.receptiveField * _convolutionalParameters.receptiveField * _layerParameters.input.NumChannels(), NumOutputRowsMinusPadding() * NumOutputColumnsMinusPadding() },
-            _weightsMatrix(_layerParameters.outputShape.NumChannels(), _convolutionalParameters.receptiveField * _convolutionalParameters.receptiveField * _layerParameters.input.NumChannels()),
-            _outputMatrix{ NumOutputChannels(), NumOutputRowsMinusPadding() * NumOutputColumnsMinusPadding() },
-            _originalConvolutionMethod(convolutionalParameters.method)
-        {
-            if (_weights.GetDataPointer() == nullptr)
-            {
-                throw utilities::InputException(utilities::InputExceptionErrors::nullReference, "weights tensor has null data field");
-            }
-
-            _isDepthwiseSeparable = (_weights.NumChannels() == 1) && (_layerParameters.input.NumChannels() > 1);
-            if (_isDepthwiseSeparable && (_output.NumChannels() != _layerParameters.input.NumChannels()))
-            {
-                throw utilities::InputException(utilities::InputExceptionErrors::sizeMismatch, "Input and output channel sizes must match for a depthwise-separable convolutional layer");
-            }
-            else if (!_isDepthwiseSeparable && (_weights.Size() != (_output.NumChannels() * _layerParameters.input.NumChannels() * convolutionalParameters.receptiveField * convolutionalParameters.receptiveField)))
-            {
-                throw utilities::InputException(utilities::InputExceptionErrors::sizeMismatch, "weights dimensions for a convolutional layer should be the size of the receptive field volume * number of filters");
-            }
-
-            CalculateConvolutionMethod();
-            ComputeWeightsMatrix();
-        }
-
-        template <typename ElementType>
-        void ConvolutionalLayer<ElementType>::Compute()
-        {
-            auto output = GetOutputMinusPadding();
-            auto& input = _layerParameters.input;
-            auto stride = static_cast<int>(_convolutionalParameters.stride);
-
-            if (!_isDepthwiseSeparable)
-            {
-                switch (_convolutionalParameters.method)
-                {
-                case ConvolutionMethod::simple:
-                {
-                    const int numFilters = static_cast<int>(output.NumChannels());
-                    dsp::Convolve2DSimple(input, _weights, numFilters, stride, output);
-                }
-                break;
-                case ConvolutionMethod::unrolled:
-                {
-                    const int numFilters = static_cast<int>(output.NumChannels());
-                    auto result = dsp::Convolve2DUnrolled(input, _weights, numFilters, stride);
-                    output.CopyFrom(result);
-                }
-                break;
-                case ConvolutionMethod::winograd:
-                {
-                    assert(stride == 1);
-                    const int numFilters = static_cast<int>(output.NumChannels());
-                    auto result = dsp::Convolve2DWinograd(input, _weights, numFilters);
-                    output.CopyFrom(result);
-                }
-                break;
-                case ConvolutionMethod::diagonal:
-                {
-                    // Use the Diagonal method
-
-                    // Flatten the input
-                    auto inputMatrix = input.ReferenceAsMatrix();
-
-                    const size_t depth = input.NumChannels();
-                    const size_t kt = _convolutionalParameters.receptiveField * depth;
-                    const size_t paddingSize = _layerParameters.inputPaddingParameters.paddingSize;
-                    const size_t numConvolutions = (inputMatrix.NumColumns() - kt) / depth + 1;
-                    const size_t numFiltersAtAtime = _convolutionalParameters.numFiltersAtATime;
-                    const size_t numFilters = _layerParameters.outputShape.NumChannels();
-                    auto weightsMatrix = _weights.ReferenceAsMatrix().Transpose();
-
-                    for (size_t j = 0; j < numConvolutions; j++)
-                    {
-                        // Get the sub matrix for Vj
-                        auto Vj = inputMatrix.GetSubMatrix(0, j * depth, inputMatrix.NumRows(), kt);
-
-                        for (size_t filterStart = 0; filterStart < numFilters; filterStart += numFiltersAtAtime)
-                        {
-                            size_t numFiltersToUse = std::min(numFiltersAtAtime, numFilters - filterStart);
-
-                            auto Wl = weightsMatrix.GetSubMatrix(0, filterStart * _convolutionalParameters.receptiveField, weightsMatrix.NumRows(), numFiltersToUse * _convolutionalParameters.receptiveField);
-
-                            MatrixType A(Vj.NumRows(), _convolutionalParameters.receptiveField * numFiltersToUse);
-
-                            math::MultiplyScaleAddUpdate(static_cast<ElementType>(1.0), Vj, Wl, static_cast<ElementType>(0.0), A);
-
-                            for (size_t l = 0; l < numFiltersToUse; l++)
-                            {
-                                for (size_t row = 0; row < (A.NumRows() - 2 * paddingSize); row++)
-                                {
-                                    ElementType sum = 0.0;
-                                    for (size_t diagonal = 0; diagonal < _convolutionalParameters.receptiveField; diagonal++)
-                                    {
-                                        sum += A(row + diagonal, l * _convolutionalParameters.receptiveField + diagonal);
-                                    }
-                                    output(row, j, filterStart + l) = sum;
-                                }
-                            }
-                        }
-                    }
-                }
-                break;
-
-                default:
-                    throw utilities::LogicException(utilities::LogicExceptionErrors::notImplemented, "Convolution method not supported");
-                }
-            }
-            else // if _isDepthwiseSeparable
-            {
-                const int numFilters = 1;
-                const size_t numInputRows = input.NumRows();
-                const size_t numInputColumns = input.NumColumns();
-                const size_t numOutputRows = output.NumRows();
-                const size_t numOutputColumns = output.NumColumns();
-                const size_t filterRows = _convolutionalParameters.receptiveField;
-
-                for (size_t channel = 0; channel < output.NumChannels(); ++channel)
-                {
-                    using TensorType = typename Layer<ElementType>::TensorType;
-                    using TensorReferenceType = typename Layer<ElementType>::TensorReferenceType;
-
-                    TensorType weights(_weights.GetSubTensor(filterRows * channel, 0, 0, filterRows, filterRows, 1));
-                    const auto& inputChannelTensor = input.GetSubTensor(0, 0, channel, numInputRows, numInputColumns, 1);
-                    TensorReferenceType outputChannelTensor = output.GetSubTensor(0, 0, channel, numOutputRows, numOutputColumns, 1);
-
-                    switch (_convolutionalParameters.method)
-                    {
-                    case ConvolutionMethod::simple:
-                    {
-                        auto result = dsp::Convolve2DSimpleDepthwiseSeparable(inputChannelTensor, weights, numFilters, stride);
-                        outputChannelTensor.CopyFrom(result);
-                    }
-                    break;
-                    case ConvolutionMethod::unrolled:
-                    {
-                        auto result = dsp::Convolve2DUnrolled(inputChannelTensor, weights, numFilters, stride);
-                        outputChannelTensor.CopyFrom(result);
-                    }
-                    break;
-                    case ConvolutionMethod::winograd:
-                    {
-                        auto result = dsp::Convolve2DWinogradDepthwiseSeparable(inputChannelTensor, weights, numFilters, stride);
-                        outputChannelTensor.CopyFrom(result);
-                    }
-                    break;
-                    default:
-                        throw utilities::LogicException(utilities::LogicExceptionErrors::notImplemented, "Convolution method not supported for depthwise separable convolution");
-                    }
-                }
-            }
-        }
-
-        template <typename ElementType>
-        void ConvolutionalLayer<ElementType>::WriteToArchive(utilities::Archiver& archiver) const
-        {
-            Layer<ElementType>::WriteToArchive(archiver);
-
-            archiver["receptiveField"] << _convolutionalParameters.receptiveField;
-            archiver["stride"] << _convolutionalParameters.stride;
-            archiver["method"] << static_cast<int>(_originalConvolutionMethod);
-            archiver["numFiltersAtATime"] << static_cast<int>(_convolutionalParameters.numFiltersAtATime);
-
-            math::TensorArchiver::Write(_weights, "weights", archiver);
-        }
-
-        template <typename ElementType>
-        void ConvolutionalLayer<ElementType>::ReadFromArchive(utilities::Unarchiver& archiver)
-        {
-            Layer<ElementType>::ReadFromArchive(archiver);
-
-            archiver["receptiveField"] >> _convolutionalParameters.receptiveField;
-            archiver["stride"] >> _convolutionalParameters.stride;
-            int method;
-            archiver["method"] >> method;
-            _originalConvolutionMethod = static_cast<ConvolutionMethod>(method);
-            int numFilters;
-            archiver["numFiltersAtATime"] >> numFilters;
-            _convolutionalParameters.numFiltersAtATime = static_cast<size_t>(numFilters);
-
-            math::TensorArchiver::Read(_weights, "weights", archiver);
-            _isDepthwiseSeparable = (_weights.NumChannels() == 1) && (_layerParameters.input.NumChannels() > 1);
-            CalculateConvolutionMethod();
-            ComputeWeightsMatrix();
-            InitializeIOMatrices();
-        }
-
-        template <typename ElementType>
-        void ConvolutionalLayer<ElementType>::ComputeWeightsMatrix()
-        {
-            if (_convolutionalParameters.method == ConvolutionMethod::unrolled)
-            {
-                _weightsMatrix = { _layerParameters.outputShape.NumChannels(), _convolutionalParameters.receptiveField * _convolutionalParameters.receptiveField * _layerParameters.input.NumChannels() };
-                // Use the unrolled method
-                // Reshape the weights
-                auto flattened = _weights.ReferenceAsMatrix();
-                for (size_t startRow = 0; startRow < flattened.NumRows() / _convolutionalParameters.receptiveField; startRow++)
-                {
-                    for (size_t row = 0; row < _convolutionalParameters.receptiveField; row++)
-                    {
-                        auto weightsVector = flattened.GetMajorVector(startRow * _convolutionalParameters.receptiveField + row);
-                        for (size_t i = 0; i < weightsVector.Size(); i++)
-                        {
-                            const size_t columnOffset = row * weightsVector.Size();
-                            _weightsMatrix(startRow, columnOffset + i) = weightsVector[i];
-                        }
-                    }
-                }
-            }
-        }
-
-        template <typename ElementType>
-        void ConvolutionalLayer<ElementType>::InitializeIOMatrices()
-        {
-            _shapedInput = { _convolutionalParameters.receptiveField * _convolutionalParameters.receptiveField * _layerParameters.input.NumChannels(), NumOutputRowsMinusPadding() * NumOutputColumnsMinusPadding() };
-            _outputMatrix = { NumOutputChannels(), NumOutputRowsMinusPadding() * NumOutputColumnsMinusPadding() };
-        }
-
-        template <typename ElementType>
-        void ConvolutionalLayer<ElementType>::CalculateConvolutionMethod()
-        {
-            _convolutionalParameters.method = _originalConvolutionMethod;
-            switch (_convolutionalParameters.method)
-            {
-            case ConvolutionMethod::automatic:
-                _convolutionalParameters.method = _isDepthwiseSeparable ? ConvolutionMethod::simple : ConvolutionMethod::unrolled;
-                break;
-            case ConvolutionMethod::simple:
-            case ConvolutionMethod::unrolled: // fallthrough
-                // do nothing
-                break;
-            case ConvolutionMethod::diagonal:
-                // Verify that we meet the criteria for doing Diagonal method. If not,
-                // choose the normal method.
-                if ((_convolutionalParameters.receptiveField % 2 == 0) || _convolutionalParameters.stride != 1)
-                {
-                    _convolutionalParameters.method = _isDepthwiseSeparable ? ConvolutionMethod::simple : ConvolutionMethod::unrolled;
-                }
-                break;
-            case ConvolutionMethod::winograd:
-                // Verify that we meet the criteria for doing Winograd method. If not,
-                // choose the normal method.
-                if (_convolutionalParameters.stride != 1 || _convolutionalParameters.receptiveField != 3)
-                {
-                    _convolutionalParameters.method = _isDepthwiseSeparable ? ConvolutionMethod::simple : ConvolutionMethod::unrolled;
-                }
-                break;
-            }
-            if (_isDepthwiseSeparable)
-            {
-                // Verify we can use a workable method for depthwise separable convolutions.
-                if ((_convolutionalParameters.method != ConvolutionMethod::unrolled) && (_convolutionalParameters.method != ConvolutionMethod::simple) && (_convolutionalParameters.method != ConvolutionMethod::winograd))
-                {
-                    _convolutionalParameters.method = ConvolutionMethod::simple;
-                }
-            }
-        }
-    } // namespace neural
-} // namespace predictors
-} // namespace ell
diff --git a/libraries/predictors/neural/tcc/FullyConnectedLayer.tcc b/libraries/predictors/neural/tcc/FullyConnectedLayer.tcc
deleted file mode 100644
index 4eea953e5..000000000
--- a/libraries/predictors/neural/tcc/FullyConnectedLayer.tcc
+++ /dev/null
@@ -1,113 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     FullyConnectedLayer.tcc (neural)
-//  Authors:  Byron Changuion
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#include <utilities/include/StringUtil.h>
-
-namespace ell
-{
-namespace predictors
-{
-    namespace neural
-    {
-
-        template <typename ElementType>
-        FullyConnectedLayer<ElementType>::FullyConnectedLayer(const LayerParameters& layerParameters, ConstMatrixReferenceType& weights) :
-            Layer<ElementType>(layerParameters),
-            _weights(weights.NumRows(), weights.NumColumns()),
-            _shapedInput(layerParameters.input.Size()),
-            _outputVector(GetOutputMinusPadding().Size())
-        {
-            _weights = weights;
-            if (_weights.NumRows() != GetOutputMinusPadding().Size())
-            {
-                throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument,
-                                                ell::utilities::FormatString("FullyConnectedLayer weights has %d row, but expecting %d based on output size minus padding",
-                                                                             _weights.NumRows(),
-                                                                             GetOutputMinusPadding().Size()));
-            }
-        }
-
-        template <typename ElementType>
-        FullyConnectedLayer<ElementType>::FullyConnectedLayer(const LayerParameters& layerParameters, ConstTensorReferenceType& weights) :
-            Layer<ElementType>(layerParameters),
-            _weights(GetOutputMinusPadding().Size(), layerParameters.input.Size(), weights.ToArray()),
-            _shapedInput(layerParameters.input.Size()),
-            _outputVector(GetOutputMinusPadding().Size())
-        {
-            if (weights.Size() != GetOutputMinusPadding().Size() * layerParameters.input.Size())
-            {
-                throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument,
-                                                ell::utilities::FormatString("FullyConnectedLayer weights size %d does not match output size (minus padding) of %d times input size %d",
-                                                                             _weights.NumRows(),
-                                                                             GetOutputMinusPadding().Size(),
-                                                                             layerParameters.input.Size()));
-            }
-        }
-
-        template <typename ElementType>
-        void FullyConnectedLayer<ElementType>::Compute()
-        {
-            auto output = GetOutputMinusPadding();
-            auto& input = _layerParameters.input;
-
-            // Reshape the input into a vector
-            size_t columnIndex = 0;
-            for (size_t i = 0; i < input.NumRows(); i++)
-            {
-                for (size_t j = 0; j < input.NumColumns(); j++)
-                {
-                    for (size_t k = 0; k < input.NumChannels(); k++)
-                    {
-                        _shapedInput[columnIndex++] = input(i, j, k);
-                    }
-                }
-            }
-
-            math::MultiplyScaleAddUpdate((ElementType)1.0f, _weights, _shapedInput, (ElementType)0.0f, _outputVector);
-
-            // Reshape the output
-            columnIndex = 0;
-            for (size_t i = 0; i < output.NumRows(); i++)
-            {
-                for (size_t j = 0; j < output.NumColumns(); j++)
-                {
-                    for (size_t k = 0; k < output.NumChannels(); k++)
-                    {
-                        output(i, j, k) = _outputVector[columnIndex++];
-                    }
-                }
-            }
-        }
-
-        template <typename ElementType>
-        const typename FullyConnectedLayer<ElementType>::MatrixType& FullyConnectedLayer<ElementType>::GetWeights() const
-        {
-            return _weights;
-        }
-
-        template <typename ElementType>
-        void FullyConnectedLayer<ElementType>::WriteToArchive(utilities::Archiver& archiver) const
-        {
-            Layer<ElementType>::WriteToArchive(archiver);
-
-            math::MatrixArchiver::Write(_weights, "weights", archiver);
-        }
-
-        template <typename ElementType>
-        void FullyConnectedLayer<ElementType>::ReadFromArchive(utilities::Unarchiver& archiver)
-        {
-            Layer<ElementType>::ReadFromArchive(archiver);
-
-            math::MatrixArchiver::Read(_weights, "weights", archiver);
-            _shapedInput.Resize(_layerParameters.input.Size());
-            _outputVector.Resize(GetOutputMinusPadding().Size());
-        }
-
-    } // namespace neural
-} // namespace predictors
-} // namespace ell
diff --git a/libraries/predictors/neural/tcc/HardSigmoidActivation.tcc b/libraries/predictors/neural/tcc/HardSigmoidActivation.tcc
deleted file mode 100644
index 014a66f4a..000000000
--- a/libraries/predictors/neural/tcc/HardSigmoidActivation.tcc
+++ /dev/null
@@ -1,29 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     ActivationFunction.tcc (neural)
-//  Authors:  James Devine
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace predictors
-{
-    namespace neural
-    {
-        template <typename ElementType>
-        ElementType HardSigmoidActivation<ElementType>::Apply(const ElementType input) const
-        {
-            ElementType output = (static_cast<ElementType>(0.2) * input) + static_cast<ElementType>(0.5);
-            return output < static_cast<ElementType>(0) ? static_cast<ElementType>(0) : (output > static_cast<ElementType>(1) ? static_cast<ElementType>(1) : output);
-        }
-
-        template <typename ElementType>
-        std::unique_ptr<ActivationImpl<ElementType>> HardSigmoidActivation<ElementType>::Copy() const
-        {
-            return std::make_unique<HardSigmoidActivation<ElementType>>();
-        }
-    } // namespace neural
-} // namespace predictors
-} // namespace ell
diff --git a/libraries/predictors/neural/tcc/InputLayer.tcc b/libraries/predictors/neural/tcc/InputLayer.tcc
deleted file mode 100644
index ed9597685..000000000
--- a/libraries/predictors/neural/tcc/InputLayer.tcc
+++ /dev/null
@@ -1,100 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     InputLayer.tcc (neural)
-//  Authors:  Byron Changuion
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace predictors
-{
-    namespace neural
-    {
-
-        template <typename ElementType>
-        InputLayer<ElementType>::InputLayer(const InputParameters& inputParameters) :
-            Layer<ElementType>(LayerParameters{ TensorType(1, 1, 1), inputParameters.inputPaddingParameters, inputParameters.outputShape, inputParameters.outputPaddingParameters }),
-            _scale(NumOutputChannels()),
-            _data(inputParameters.inputShape)
-        {
-            _layerParameters.input = _data;
-            _scale.Fill(inputParameters.scale);
-        }
-
-        template <typename ElementType>
-        void InputLayer<ElementType>::SetInput(const DataVectorType& input)
-        {
-            size_t index = 0;
-            auto& inputTensor = _data;
-
-            for (size_t i = 0; i < inputTensor.NumRows(); ++i)
-            {
-                for (size_t j = 0; j < inputTensor.NumColumns(); ++j)
-                {
-                    for (size_t k = 0; k < inputTensor.NumChannels(); ++k)
-                    {
-                        inputTensor(i, j, k) = static_cast<ElementType>(input[index++]);
-                    }
-                }
-            }
-        }
-
-        template <typename ElementType>
-        void InputLayer<ElementType>::SetInput(const std::vector<ElementType>& input)
-        {
-            size_t index = 0;
-            auto& inputTensor = _data;
-
-            for (size_t i = 0; i < inputTensor.NumRows(); ++i)
-            {
-                for (size_t j = 0; j < inputTensor.NumColumns(); ++j)
-                {
-                    for (size_t k = 0; k < inputTensor.NumChannels(); ++k)
-                    {
-                        inputTensor(i, j, k) = static_cast<ElementType>(input[index++]);
-                    }
-                }
-            }
-        }
-
-        template <typename ElementType>
-        void InputLayer<ElementType>::Compute()
-        {
-            auto output = GetOutputMinusPadding();
-            auto& input = _layerParameters.input;
-
-            AssignValues(input, output);
-            math::ScaleUpdate<math::Dimension::channel>(_scale, output);
-        }
-
-        template <typename ElementType>
-        void InputLayer<ElementType>::WriteToArchive(utilities::Archiver& archiver) const
-        {
-            Layer<ElementType>::WriteToArchive(archiver);
-
-            math::TensorArchiver::Write(_data, "data", archiver);
-            if (_scale.Size() > 0)
-                archiver["scale"] << _scale[0];
-            else
-                archiver["scale"] << 1;
-        }
-
-        template <typename ElementType>
-        void InputLayer<ElementType>::ReadFromArchive(utilities::Unarchiver& archiver)
-        {
-            Layer<ElementType>::ReadFromArchive(archiver);
-
-            math::TensorArchiver::Read(_data, "data", archiver);
-            ElementType scale = 1;
-            archiver["scale"] >> scale;
-            _scale.Resize(NumOutputChannels());
-            _scale.Fill(scale);
-
-            _layerParameters.input = _data;
-        }
-
-    } // namespace neural
-} // namespace predictors
-} // namespace ell
diff --git a/libraries/predictors/neural/tcc/Layer.tcc b/libraries/predictors/neural/tcc/Layer.tcc
deleted file mode 100644
index 1848e4bad..000000000
--- a/libraries/predictors/neural/tcc/Layer.tcc
+++ /dev/null
@@ -1,243 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     Layer.tcc (neural)
-//  Authors:  Byron Changuion
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#include <limits>
-#include <ostream>
-#include <type_traits>
-
-namespace ell
-{
-namespace predictors
-{
-    namespace neural
-    {
-        template <typename ValueType>
-        ValueType GetPaddingValue(PaddingScheme paddingScheme)
-        {
-            switch (paddingScheme)
-            {
-            case PaddingScheme::zeros:
-                return static_cast<ValueType>(0);
-            case PaddingScheme::minusOnes:
-                return static_cast<ValueType>(-1);
-            case PaddingScheme::alternatingZeroAndOnes:
-                return static_cast<ValueType>(0);
-            case PaddingScheme::randomZeroAndOnes:
-                return static_cast<ValueType>(0);
-            case PaddingScheme::min:
-                return -std::numeric_limits<ValueType>::max();
-            case PaddingScheme::max:
-                return std::numeric_limits<ValueType>::max();
-            }
-            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "Invalid PaddingScheme");
-        }
-
-        //
-        // Layer
-        //
-        template <typename ElementType>
-        Layer<ElementType>::Layer(const LayerParameters& layerParameters) :
-            _layerParameters(layerParameters),
-            _output(layerParameters.outputShape)
-        {
-            InitializeOutputValues(_output, layerParameters.outputPaddingParameters);
-        }
-
-        template <typename ElementType>
-        Layer<ElementType>::Layer(const Layer& other) :
-            _layerParameters(other._layerParameters),
-            _output(other._layerParameters.outputShape)
-        {
-            InitializeOutputValues(_output, other._layerParameters.outputPaddingParameters);
-        }
-
-        template <typename ElementType>
-        typename Layer<ElementType>::Shape Layer<ElementType>::GetInputShapeMinusPadding() const
-        {
-            auto&& inputShape = _layerParameters.input.GetShape();
-            auto paddingSize = _layerParameters.inputPaddingParameters.paddingSize;
-            if (inputShape.NumRows() < 2 * paddingSize || inputShape.NumColumns() < 2 * paddingSize)
-            {
-                throw utilities::InputException(utilities::InputExceptionErrors::sizeMismatch, "Input size not large enough to accomodate padding");
-            }
-            return { inputShape.NumRows() - 2 * paddingSize, inputShape.NumColumns() - 2 * paddingSize, inputShape.NumChannels() };
-        }
-
-        template <typename ElementType>
-        typename Layer<ElementType>::Shape Layer<ElementType>::GetOutputShapeMinusPadding() const
-        {
-            auto&& outputShape = _layerParameters.outputShape;
-            auto paddingSize = _layerParameters.outputPaddingParameters.paddingSize;
-            if (outputShape.NumRows() < 2 * paddingSize || outputShape.NumColumns() < 2 * paddingSize)
-            {
-                throw utilities::InputException(utilities::InputExceptionErrors::sizeMismatch, "Output size not large enough to accommodate padding");
-            }
-            return { outputShape.NumRows() - 2 * paddingSize, outputShape.NumColumns() - 2 * paddingSize, outputShape.NumChannels() };
-        }
-
-        template <typename ElementType>
-        void Layer<ElementType>::InitializeOutputValues(TensorType& output, PaddingParameters outputPaddingParameters)
-        {
-            switch (outputPaddingParameters.paddingScheme)
-            {
-            case PaddingScheme::minusOnes:
-                output.Fill(-1);
-                break;
-            case PaddingScheme::randomZeroAndOnes:
-                output.Generate([] { return static_cast<ElementType>(std::rand() % 2); });
-                break;
-            case PaddingScheme::alternatingZeroAndOnes:
-            {
-                for (size_t row = 0; row < output.NumRows(); row++)
-                {
-                    for (size_t column = 0; column < output.NumColumns(); column++)
-                    {
-                        ElementType value = static_cast<ElementType>((row % 2) ^ (column % 2));
-                        for (size_t channel = 0; channel < output.NumChannels(); channel++)
-                        {
-                            output(row, column, channel) = value;
-                        }
-                    }
-                }
-            }
-            break;
-            case PaddingScheme::min:
-                if (std::is_signed<ElementType>::value)
-                {
-                    output.Fill(-std::numeric_limits<ElementType>::max());
-                }
-                else
-                {
-                    output.Fill(std::numeric_limits<ElementType>::min());
-                }
-                break;
-            case PaddingScheme::max:
-                output.Fill(std::numeric_limits<ElementType>::max());
-                break;
-            default:
-                output.Fill(0);
-                break;
-            }
-        }
-
-        template <typename ElementType>
-        void Layer<ElementType>::Print(std::ostream& os, size_t numValuesToPrint) const
-        {
-            static constexpr size_t bufferLength = 1024;
-            char buffer[bufferLength] = { 0 };
-            std::string layerName = LayerNames[static_cast<uint32_t>(GetLayerType())];
-
-            snprintf(buffer, bufferLength, "======== %s layer (%zd x %zd x %zd) pad: %zd -> (%zd x %zd x %zd) pad: %zd ========", layerName.c_str(), _layerParameters.input.NumRows() - 2 * _layerParameters.inputPaddingParameters.paddingSize, _layerParameters.input.NumColumns() - 2 * _layerParameters.inputPaddingParameters.paddingSize, _layerParameters.input.NumChannels(), _layerParameters.inputPaddingParameters.paddingSize, _layerParameters.outputShape.NumRows() - 2 * _layerParameters.outputPaddingParameters.paddingSize, _layerParameters.outputShape.NumColumns() - 2 * _layerParameters.outputPaddingParameters.paddingSize, _layerParameters.outputShape.NumChannels(), _layerParameters.outputPaddingParameters.paddingSize);
-
-            os << buffer;
-
-            const ConstTensorReferenceType output(_output);
-            for (size_t i = 0; (i < numValuesToPrint) && (i < output.Size()); i++)
-            {
-                size_t channel = i % output.NumChannels();
-                size_t col = (i / output.NumChannels()) % output.NumColumns();
-                size_t row = i / (output.NumChannels() * output.NumColumns());
-
-                if (i % 10 == 0) os << std::endl;
-
-                if (channel < output.NumChannels() &&
-                    (col + _layerParameters.outputPaddingParameters.paddingSize) < output.NumColumns() &&
-                    (row + _layerParameters.outputPaddingParameters.paddingSize) < output.NumRows())
-                {
-                    const ElementType val = output({ row + _layerParameters.outputPaddingParameters.paddingSize, col + _layerParameters.outputPaddingParameters.paddingSize, channel });
-                    snprintf(buffer, bufferLength, "%+9.5f ", val);
-                    os << buffer;
-                }
-            }
-            os << std::endl
-               << "======== End of " << layerName << " ========" << std::endl;
-        }
-
-        template <typename ElementType>
-        void Layer<ElementType>::WriteToArchive(utilities::Archiver& archiver) const
-        {
-            archiver["inputPaddingScheme"] << static_cast<int>(_layerParameters.inputPaddingParameters.paddingScheme);
-            archiver["inputPaddingSize"] << _layerParameters.inputPaddingParameters.paddingSize;
-
-            std::vector<size_t> outputShape = _layerParameters.outputShape;
-            archiver["outputShape"] << outputShape;
-
-            archiver["outputPaddingScheme"] << static_cast<int>(_layerParameters.outputPaddingParameters.paddingScheme);
-            archiver["outputPaddingSize"] << _layerParameters.outputPaddingParameters.paddingSize;
-        }
-
-        template <typename ElementType>
-        void Layer<ElementType>::ReadFromArchive(utilities::Unarchiver& archiver)
-        {
-            int inputPaddingScheme;
-            archiver["inputPaddingScheme"] >> inputPaddingScheme;
-            _layerParameters.inputPaddingParameters.paddingScheme = static_cast<PaddingScheme>(inputPaddingScheme);
-            archiver["inputPaddingSize"] >> _layerParameters.inputPaddingParameters.paddingSize;
-
-            std::vector<size_t> outputShape;
-            archiver["outputShape"] >> outputShape;
-            math::IntegerTriplet shape;
-            std::copy(outputShape.begin(), outputShape.end(), shape.begin());
-            _layerParameters.outputShape = Shape(shape);
-
-            int outputPaddingScheme;
-            archiver["outputPaddingScheme"] >> outputPaddingScheme;
-            _layerParameters.outputPaddingParameters.paddingScheme = static_cast<PaddingScheme>(outputPaddingScheme);
-            archiver["outputPaddingSize"] >> _layerParameters.outputPaddingParameters.paddingSize;
-
-            _output = TensorType(_layerParameters.outputShape);
-
-            LayerSerializationContext<ElementType>* layerContext = dynamic_cast<LayerSerializationContext<ElementType>*>(&archiver.GetContext());
-            if (layerContext != nullptr)
-            {
-                // Set the input reference to the previously restored layer's output. This is saved in the
-                // serialization context
-                _layerParameters.input = layerContext->GetPreviousOutputReference();
-
-                // Save the output reference to the serialization context
-                layerContext->SetOutputReference(GetOutput());
-            }
-
-            // Set the initial padding
-            InitializeOutputValues(_output, _layerParameters.outputPaddingParameters);
-        }
-
-        template <typename ElementType>
-        typename Layer<ElementType>::ConstTensorReferenceType Layer<ElementType>::GetInputMinusPadding()
-        {
-            auto padding = _layerParameters.inputPaddingParameters.paddingSize;
-            return _layerParameters.input.GetSubTensor({ padding, padding, 0 }, GetInputShapeMinusPadding());
-        }
-
-        template <typename ElementType>
-        typename Layer<ElementType>::TensorReferenceType Layer<ElementType>::GetOutputMinusPadding()
-        {
-            auto padding = _layerParameters.outputPaddingParameters.paddingSize;
-            return _output.GetSubTensor({ padding, padding, 0 },
-                                        { _output.NumRows() - 2 * padding, _output.NumColumns() - 2 * padding, _output.NumChannels() });
-        }
-
-        template <typename ElementType>
-        void Layer<ElementType>::AssignValues(ConstTensorReferenceType& input, TensorReferenceType& output)
-        {
-            DEBUG_THROW(input.NumRows() > output.NumRows() || input.NumColumns() > output.NumColumns() || input.NumChannels() > output.NumChannels(), utilities::InputException(utilities::InputExceptionErrors::sizeMismatch, "Input tensor must not exceed output tensor dimensions."));
-
-            for (size_t i = 0; i < input.NumRows(); i++)
-            {
-                for (size_t j = 0; j < input.NumColumns(); j++)
-                {
-                    for (size_t k = 0; k < input.NumChannels(); k++)
-                    {
-                        output(i, j, k) = input(i, j, k);
-                    }
-                }
-            }
-        }
-    } // namespace neural
-} // namespace predictors
-} // namespace ell
diff --git a/libraries/predictors/neural/tcc/LeakyReLUActivation.tcc b/libraries/predictors/neural/tcc/LeakyReLUActivation.tcc
deleted file mode 100644
index 359f111f4..000000000
--- a/libraries/predictors/neural/tcc/LeakyReLUActivation.tcc
+++ /dev/null
@@ -1,40 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     LeakyReLUActivation.tcc (neural)
-//  Authors:  Byron Changuion
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace predictors
-{
-    namespace neural
-    {
-        template <typename ElementType>
-        ElementType LeakyReLUActivation<ElementType>::Apply(const ElementType input) const
-        {
-            return ((input > 0) ? input : _leakyFactor * input);
-        }
-
-        template <typename ElementType>
-        void LeakyReLUActivation<ElementType>::WriteToArchive(utilities::Archiver& archiver) const
-        {
-            archiver["leakyFactor"] << _leakyFactor;
-        }
-
-        template <typename ElementType>
-        void LeakyReLUActivation<ElementType>::ReadFromArchive(utilities::Unarchiver& archiver)
-        {
-            archiver["leakyFactor"] >> _leakyFactor;
-        }
-
-        template <typename ElementType>
-        std::unique_ptr<ActivationImpl<ElementType>> LeakyReLUActivation<ElementType>::Copy() const
-        {
-            return std::make_unique<LeakyReLUActivation<ElementType>>(_leakyFactor);
-        }
-    } // namespace neural
-} // namespace predictors
-} // namespace ell
diff --git a/libraries/predictors/neural/tcc/MaxPoolingFunction.tcc b/libraries/predictors/neural/tcc/MaxPoolingFunction.tcc
deleted file mode 100644
index 06df49aae..000000000
--- a/libraries/predictors/neural/tcc/MaxPoolingFunction.tcc
+++ /dev/null
@@ -1,37 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     MaxPoolingFunction.tcc (neural)
-//  Authors:  Byron Changuion
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#include <algorithm>
-#include <limits>
-
-namespace ell
-{
-namespace predictors
-{
-    namespace neural
-    {
-        template <typename ElementType>
-        MaxPoolingFunction<ElementType>::MaxPoolingFunction() :
-            _max(std::numeric_limits<ElementType>::lowest())
-        {
-        }
-
-        template <typename ElementType>
-        void MaxPoolingFunction<ElementType>::Accumulate(ElementType input)
-        {
-            _max = std::max(_max, input);
-        }
-
-        template <typename ElementType>
-        ElementType MaxPoolingFunction<ElementType>::GetValue() const
-        {
-            return _max;
-        }
-    } // namespace neural
-} // namespace predictors
-} // namespace ell
diff --git a/libraries/predictors/neural/tcc/MeanPoolingFunction.tcc b/libraries/predictors/neural/tcc/MeanPoolingFunction.tcc
deleted file mode 100644
index a79f33ade..000000000
--- a/libraries/predictors/neural/tcc/MeanPoolingFunction.tcc
+++ /dev/null
@@ -1,38 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     MeanPoolingFunction.tcc (neural)
-//  Authors:  Byron Changuion
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#include <cmath>
-
-namespace ell
-{
-namespace predictors
-{
-    namespace neural
-    {
-        template <typename ElementType>
-        MeanPoolingFunction<ElementType>::MeanPoolingFunction() :
-            _sum(0),
-            _numValues(0)
-        {
-        }
-
-        template <typename ElementType>
-        void MeanPoolingFunction<ElementType>::Accumulate(ElementType input)
-        {
-            _sum += input;
-            _numValues++;
-        }
-
-        template <typename ElementType>
-        ElementType MeanPoolingFunction<ElementType>::GetValue() const
-        {
-            return (_sum / (ElementType)_numValues);
-        }
-    } // namespace neural
-} // namespace predictors
-} // namespace ell
diff --git a/libraries/predictors/neural/tcc/ParametricReLUActivation.tcc b/libraries/predictors/neural/tcc/ParametricReLUActivation.tcc
deleted file mode 100644
index e96e2a0a5..000000000
--- a/libraries/predictors/neural/tcc/ParametricReLUActivation.tcc
+++ /dev/null
@@ -1,58 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     ParametricReLUActivation.tcc (neural)
-//  Authors:  Lisa Ong
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace predictors
-{
-    namespace neural
-    {
-        template <typename ElementType>
-        ParametricReLUActivation<ElementType>::ParametricReLUActivation(TensorType alpha) :
-            _alpha(std::move(alpha))
-        {
-            if (_alpha.GetDataPointer() == nullptr)
-            {
-                throw utilities::InputException(utilities::InputExceptionErrors::nullReference, "alpha tensor has null data field");
-            }
-        }
-
-        template <typename ElementType>
-        ElementType ParametricReLUActivation<ElementType>::Apply(const ElementType input) const
-        {
-            UNUSED(input);
-            // we want people to call the ApplyIndex method in this case.
-            throw utilities::LogicException(utilities::LogicExceptionErrors::notImplemented);
-        }
-
-        template <typename ElementType>
-        ElementType ParametricReLUActivation<ElementType>::ApplyIndex(const ElementType input, const math::IntegerTriplet& index) const
-        {
-            return ((input > 0) ? input : _alpha(index) * input);
-        }
-
-        template <typename ElementType>
-        void ParametricReLUActivation<ElementType>::WriteToArchive(utilities::Archiver& archiver) const
-        {
-            math::TensorArchiver::Write(_alpha, "alpha", archiver);
-        }
-
-        template <typename ElementType>
-        void ParametricReLUActivation<ElementType>::ReadFromArchive(utilities::Unarchiver& archiver)
-        {
-            math::TensorArchiver::Read(_alpha, "alpha", archiver);
-        }
-
-        template <typename ElementType>
-        std::unique_ptr<ActivationImpl<ElementType>> ParametricReLUActivation<ElementType>::Copy() const
-        {
-            return std::make_unique<ParametricReLUActivation<ElementType>>(_alpha);
-        }
-    } // namespace neural
-} // namespace predictors
-} // namespace ell
\ No newline at end of file
diff --git a/libraries/predictors/neural/tcc/PoolingLayer.tcc b/libraries/predictors/neural/tcc/PoolingLayer.tcc
deleted file mode 100644
index 579198c2d..000000000
--- a/libraries/predictors/neural/tcc/PoolingLayer.tcc
+++ /dev/null
@@ -1,118 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     PoolingLayer.tcc (neural)
-//  Authors:  Byron Changuion
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#include <algorithm>
-
-namespace ell
-{
-namespace predictors
-{
-    namespace neural
-    {
-
-        template <typename ElementType, template <typename> class PoolingFunctionType>
-        PoolingLayer<ElementType, PoolingFunctionType>::PoolingLayer(const LayerParameters& layerParameters, PoolingParameters poolingParameters) :
-            Layer<ElementType>(layerParameters),
-            _poolingParameters(poolingParameters)
-        {
-        }
-
-        template <typename ElementType, template <typename> class PoolingFunctionType>
-        void PoolingLayer<ElementType, PoolingFunctionType>::Compute()
-        {
-            auto input = GetInput();
-            auto output = GetOutputMinusPadding();
-            const size_t poolingWindowSize = _poolingParameters.poolingSize;
-
-            for (size_t row = 0; row < output.NumRows(); row++)
-            {
-                const size_t startRow = row * _poolingParameters.stride;
-                for (size_t column = 0; column < output.NumColumns(); column++)
-                {
-                    const size_t startColumn = column * _poolingParameters.stride;
-                    std::vector<PoolingFunctionType<ElementType>> poolingValues(output.NumChannels());
-
-                    for (size_t pool_y = 0; pool_y < poolingWindowSize; pool_y++)
-                    {
-                        for (size_t pool_x = 0; pool_x < poolingWindowSize; pool_x++)
-                        {
-                            for (size_t channel = 0; channel < output.NumChannels(); channel++)
-                            {
-                                // Account for when part of the pooling window falls beyond the pooling region.
-                                size_t inputRow = startRow + pool_y;
-                                size_t inputColumn = startColumn + pool_x;
-
-                                if ((inputRow < input.NumRows()) && (inputColumn < input.NumColumns()))
-                                {
-                                    poolingValues[channel].Accumulate(input(inputRow, inputColumn, channel));
-                                }
-                                else
-                                {
-                                    poolingValues[channel].Accumulate(poolingValues[channel].GetValueAtPadding());
-                                }
-                            }
-                        }
-                    }
-
-                    for (size_t channel = 0; channel < output.NumChannels(); channel++)
-                    {
-                        output(row, column, channel) = poolingValues[channel].GetValue();
-                    }
-                }
-            }
-        }
-
-        template <typename ElementType, template <typename> class PoolingFunctionType>
-        bool PoolingLayer<ElementType, PoolingFunctionType>::UsesPadding() const
-        {
-            const size_t inputDataPaddingSize = GetLayerParameters().inputPaddingParameters.paddingSize;
-            const auto inputShape = GetInputShapeMinusPadding();
-            const auto outputShape = GetOutputShapeMinusPadding();
-            const auto inputWidth = inputShape.NumRows();
-            const auto outputWidth = outputShape.NumRows();
-            const auto stride = _poolingParameters.stride;
-            const auto poolingSize = _poolingParameters.poolingSize;
-
-            const auto paddedOutputWidth = (inputWidth - 1) / stride + 1; // ceil(inputWidth/stride);
-            const auto nonPaddedOutputWidth = (inputWidth - poolingSize) / stride + 1; // ceil((inputWidth-windowWidth+1) / stride)
-
-            if (outputWidth == nonPaddedOutputWidth)
-            {
-                return false;
-            }
-            else if (outputWidth == paddedOutputWidth)
-            {
-                return true;
-            }
-            else
-            {
-                return inputDataPaddingSize != 0;
-            }
-        }
-
-        template <typename ElementType, template <typename> class PoolingFunctionType>
-        void PoolingLayer<ElementType, PoolingFunctionType>::WriteToArchive(utilities::Archiver& archiver) const
-        {
-            Layer<ElementType>::WriteToArchive(archiver);
-
-            archiver["poolingSize"] << _poolingParameters.poolingSize;
-            archiver["stride"] << _poolingParameters.stride;
-        }
-
-        template <typename ElementType, template <typename> class PoolingFunctionType>
-        void PoolingLayer<ElementType, PoolingFunctionType>::ReadFromArchive(utilities::Unarchiver& archiver)
-        {
-            Layer<ElementType>::ReadFromArchive(archiver);
-
-            archiver["poolingSize"] >> _poolingParameters.poolingSize;
-            archiver["stride"] >> _poolingParameters.stride;
-        }
-
-    } // namespace neural
-} // namespace predictors
-} // namespace ell
diff --git a/libraries/predictors/neural/tcc/ReLUActivation.tcc b/libraries/predictors/neural/tcc/ReLUActivation.tcc
deleted file mode 100644
index 38828c464..000000000
--- a/libraries/predictors/neural/tcc/ReLUActivation.tcc
+++ /dev/null
@@ -1,28 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     ReLUActivation.tcc (neural)
-//  Authors:  Byron Changuion
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace predictors
-{
-    namespace neural
-    {
-        template <typename ElementType>
-        ElementType ReLUActivation<ElementType>::Apply(const ElementType input) const
-        {
-            return ((input > 0) ? input : 0);
-        }
-
-        template <typename ElementType>
-        std::unique_ptr<ActivationImpl<ElementType>> ReLUActivation<ElementType>::Copy() const
-        {
-            return std::make_unique<ReLUActivation<ElementType>>();
-        }
-    } // namespace neural
-} // namespace predictors
-} // namespace ell
diff --git a/libraries/predictors/neural/tcc/RegionDetectionLayer.tcc b/libraries/predictors/neural/tcc/RegionDetectionLayer.tcc
deleted file mode 100644
index 6d51e4689..000000000
--- a/libraries/predictors/neural/tcc/RegionDetectionLayer.tcc
+++ /dev/null
@@ -1,147 +0,0 @@
-///////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     RegionDetectionLayer.tcc (neural)
-//  Authors:  Kern Handa
-//
-///////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace predictors
-{
-    namespace neural
-    {
-        template <typename ElementType>
-        RegionDetectionLayer<ElementType>::RegionDetectionLayer(const LayerParameters& layerParameters, RegionDetectionParameters regionDetectionParams) :
-            Base(layerParameters),
-            _regionDetectionParams(std::move(regionDetectionParams))
-        {
-            if (_regionDetectionParams.numAnchors <= 0)
-            {
-                throw std::invalid_argument("regionDetectionParams.numAnchors <= 0");
-            }
-
-            if (_regionDetectionParams.width <= 0)
-            {
-                throw std::invalid_argument("regionDetectionParams.width <= 0");
-            }
-
-            if (_regionDetectionParams.height <= 0)
-            {
-                throw std::invalid_argument("regionDetectionParams.height <= 0");
-            }
-
-            if (_regionDetectionParams.numBoxesPerCell <= 0)
-            {
-                throw std::invalid_argument("regionDetectionParams.numBoxesPerCell <= 0");
-            }
-
-            if (_regionDetectionParams.numClasses <= 0)
-            {
-                throw std::invalid_argument("regionDetectionParams.numClasses <= 0");
-            }
-
-            if (this->_layerParameters.input.NumRows() != (size_t)_regionDetectionParams.width)
-            {
-                throw std::invalid_argument("input number of rows doesn't match width in detection parameters");
-            }
-
-            if (this->_layerParameters.input.NumColumns() != (size_t)_regionDetectionParams.height)
-            {
-                throw std::invalid_argument("input number of columns doesn't match height in detection parameters");
-            }
-
-            if (this->_layerParameters.input.NumChannels() != (size_t)((
-                                                                  (_regionDetectionParams.numAnchors + 1 + _regionDetectionParams.numClasses) *
-                                                                  _regionDetectionParams.numBoxesPerCell)))
-            {
-                throw std::invalid_argument("input number of channels doesn't match box size * number of boxes in detection parameters");
-            }
-        }
-
-        template <typename ElementType>
-        void RegionDetectionLayer<ElementType>::Compute()
-        {
-            auto output = this->GetOutputMinusPadding();
-            auto& input = this->_layerParameters.input;
-            auto numAnchors = _regionDetectionParams.numAnchors;
-
-            assert(output.GetShape() == input.GetShape());
-
-            // The input has the shape of width x height x ((5 + classes) * numBoxes)
-            // Each "cell" in the third dimension has the format
-            // [tx, ty, tw, th, tc, class probabilities...] for each "box".
-            // The first four (tx, ty, tw, th) are coordinates that define
-            // the bounding box for the region where the network thinks an
-            // object might be. tc is the confidence on the presence of an
-            // object at all, and should be considered the scale of the
-            // class probabilities.
-
-            SigmoidActivation<ElementType> sigmoid;
-            SoftMaxActivation<ElementType> softmax;
-
-            for (int i = 0; i < _regionDetectionParams.width; ++i)
-            {
-                for (int j = 0; j < _regionDetectionParams.height; ++j)
-                {
-                    auto outputChannelVector = output.template GetSlice<math::Dimension::channel>(i, j);
-                    auto inputChannelVector = input.template GetSlice<math::Dimension::channel>(i, j);
-
-                    for (int k = 0; k < _regionDetectionParams.numBoxesPerCell; ++k)
-                    {
-                        auto boxOffset = k * (numAnchors + 1 + _regionDetectionParams.numClasses);
-
-                        // Get the vector for the anchors for both output and input
-                        auto outputAnchors = outputChannelVector.GetSubVector(boxOffset, numAnchors);
-                        auto inputAnchors = inputChannelVector.GetSubVector(boxOffset, numAnchors);
-
-                        // Copy input over to output
-                        outputAnchors.CopyFrom(inputAnchors);
-
-                        // Apply sigmoid to the confidence value, which is immediately after the anchor points
-                        outputChannelVector[boxOffset + numAnchors] = sigmoid(inputChannelVector[boxOffset + numAnchors]);
-
-                        // Get the vector for the class probabilities for both output and input
-                        auto outputClassProbabilities = outputChannelVector.GetSubVector(boxOffset + numAnchors + 1, _regionDetectionParams.numClasses);
-                        auto inputClassProbabilities = inputChannelVector.GetSubVector(boxOffset + numAnchors + 1, _regionDetectionParams.numClasses);
-
-                        // Copy input over to output
-                        outputClassProbabilities.CopyFrom(inputClassProbabilities);
-
-                        if (_regionDetectionParams.applySoftmax)
-                        {
-                            // Apply softmax to probabilities
-                            softmax(outputClassProbabilities);
-                        }
-                    }
-                }
-            }
-        }
-
-        template <typename ElementType>
-        void RegionDetectionLayer<ElementType>::WriteToArchive(utilities::Archiver& archiver) const
-        {
-            Layer<ElementType>::WriteToArchive(archiver);
-
-            archiver["width"] << _regionDetectionParams.width;
-            archiver["height"] << _regionDetectionParams.height;
-            archiver["numBoxesPerCell"] << _regionDetectionParams.numBoxesPerCell;
-            archiver["numClasses"] << _regionDetectionParams.numClasses;
-            archiver["numCoordinates"] << _regionDetectionParams.numAnchors;
-        }
-
-        template <typename ElementType>
-        void RegionDetectionLayer<ElementType>::ReadFromArchive(utilities::Unarchiver& unarchiver)
-        {
-            Layer<ElementType>::ReadFromArchive(unarchiver);
-
-            unarchiver["width"] >> _regionDetectionParams.width;
-            unarchiver["height"] >> _regionDetectionParams.height;
-            unarchiver["numBoxesPerCell"] >> _regionDetectionParams.numBoxesPerCell;
-            unarchiver["numClasses"] >> _regionDetectionParams.numClasses;
-            unarchiver["numCoordinates"] >> _regionDetectionParams.numAnchors;
-        }
-    } // namespace neural
-} // namespace predictors
-} // namespace ell
diff --git a/libraries/predictors/neural/tcc/ScalingLayer.tcc b/libraries/predictors/neural/tcc/ScalingLayer.tcc
deleted file mode 100644
index 9667a08c5..000000000
--- a/libraries/predictors/neural/tcc/ScalingLayer.tcc
+++ /dev/null
@@ -1,49 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     ScalingLayer.tcc (neural)
-//  Authors:  Byron Changuion
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace predictors
-{
-    namespace neural
-    {
-        template <typename ElementType>
-        ScalingLayer<ElementType>::ScalingLayer(const LayerParameters& layerParameters, const VectorType& scales) :
-            Layer<ElementType>(layerParameters),
-            _scales(scales)
-        {
-        }
-
-        template <typename ElementType>
-        void ScalingLayer<ElementType>::Compute()
-        {
-            auto output = GetOutputMinusPadding();
-            auto& input = _layerParameters.input;
-
-            AssignValues(input, output);
-            math::ScaleUpdate<math::Dimension::channel>(_scales, output);
-        }
-
-        template <typename ElementType>
-        void ScalingLayer<ElementType>::WriteToArchive(utilities::Archiver& archiver) const
-        {
-            Layer<ElementType>::WriteToArchive(archiver);
-
-            math::VectorArchiver::Write(_scales, "scales", archiver);
-        }
-
-        template <typename ElementType>
-        void ScalingLayer<ElementType>::ReadFromArchive(utilities::Unarchiver& archiver)
-        {
-            Layer<ElementType>::ReadFromArchive(archiver);
-
-            math::VectorArchiver::Read(_scales, "scales", archiver);
-        }
-    } // namespace neural
-} // namespace predictors
-} // namespace ell
diff --git a/libraries/predictors/neural/tcc/SigmoidActivation.tcc b/libraries/predictors/neural/tcc/SigmoidActivation.tcc
deleted file mode 100644
index e3b9b3290..000000000
--- a/libraries/predictors/neural/tcc/SigmoidActivation.tcc
+++ /dev/null
@@ -1,41 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     SigmoidActivation.tcc (neural)
-//  Authors:  Byron Changuion
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#include <cmath>
-
-namespace ell
-{
-namespace predictors
-{
-    namespace neural
-    {
-        template <typename ElementType>
-        ElementType SigmoidActivation<ElementType>::Apply(const ElementType input) const
-        {
-            ElementType output;
-            if (input >= 0.0)
-            {
-                double exp_value = std::exp(-input);
-                output = static_cast<ElementType>(1.0 / (1.0 + exp_value));
-            }
-            else
-            {
-                double exp_value = std::exp(input);
-                output = static_cast<ElementType>(exp_value / (1.0 + exp_value));
-            }
-            return output;
-        }
-
-        template <typename ElementType>
-        std::unique_ptr<ActivationImpl<ElementType>> SigmoidActivation<ElementType>::Copy() const
-        {
-            return std::make_unique<SigmoidActivation<ElementType>>();
-        }
-    } // namespace neural
-} // namespace predictors
-} // namespace ell
diff --git a/libraries/predictors/neural/tcc/SoftMaxActivation.tcc b/libraries/predictors/neural/tcc/SoftMaxActivation.tcc
deleted file mode 100644
index 5bd2d68b7..000000000
--- a/libraries/predictors/neural/tcc/SoftMaxActivation.tcc
+++ /dev/null
@@ -1,51 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     SoftMaxActivation.tcc (neural)
-//  Authors:  Byron Changuion
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#include <algorithm>
-#include <limits>
-
-namespace ell
-{
-namespace predictors
-{
-    namespace neural
-    {
-        template <typename ElementType>
-        void SoftMaxActivation<ElementType>::Apply(math::ColumnVectorReference<ElementType>& input) const
-        {
-            ElementType maxVal = std::numeric_limits<ElementType>::lowest();
-            for (size_t i = 0; i < input.Size(); ++i)
-            {
-                maxVal = std::max(maxVal, input[i]);
-            }
-
-            ElementType sum = 0;
-            for (size_t i = 0; i < input.Size(); ++i)
-            {
-                const auto eulerVal = static_cast<ElementType>(std::exp(input[i] - maxVal));
-                input[i] = eulerVal;
-                sum += eulerVal;
-            }
-
-            const ElementType epsilon = static_cast<ElementType>(1e-7);
-            if (sum < epsilon)
-            {
-                sum = 1.0;
-            }
-
-            input.Transform([sum](ElementType value) { return value / sum; });
-        }
-
-        template <typename ElementType>
-        void SoftMaxActivation<ElementType>::operator()(math::ColumnVectorReference<ElementType>& input) const
-        {
-            return Apply(input);
-        }
-    } // namespace neural
-} // namespace predictors
-} // namespace ell
diff --git a/libraries/predictors/neural/tcc/SoftmaxLayer.tcc b/libraries/predictors/neural/tcc/SoftmaxLayer.tcc
deleted file mode 100644
index ed07e2f05..000000000
--- a/libraries/predictors/neural/tcc/SoftmaxLayer.tcc
+++ /dev/null
@@ -1,82 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     SoftmaxLayer.tcc (neural)
-//  Authors:  Byron Changuion
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#include <limits>
-
-namespace ell
-{
-namespace predictors
-{
-    namespace neural
-    {
-
-        template <typename ElementType>
-        SoftmaxLayer<ElementType>::SoftmaxLayer(const LayerParameters& layerParameters) :
-            Layer<ElementType>(layerParameters)
-        {
-            if (_layerParameters.input.Size() != GetOutputMinusPadding().Size())
-            {
-                throw utilities::InputException(utilities::InputExceptionErrors::sizeMismatch, GetRuntimeTypeName() + ": Expected size of input and output tensor (minus padding) to match");
-            }
-        }
-
-        template <typename ElementType>
-        void SoftmaxLayer<ElementType>::Compute()
-        {
-            auto output = GetOutputMinusPadding();
-            auto& input = _layerParameters.input;
-
-            AssignValues(input, output);
-
-            ElementType sum = 0;
-            ElementType maxValue = std::numeric_limits<ElementType>::lowest();
-
-            // Find the max
-            for (size_t i = 0; i < input.NumRows(); i++)
-            {
-                for (size_t j = 0; j < input.NumColumns(); j++)
-                {
-                    for (size_t k = 0; k < input.NumChannels(); k++)
-                    {
-                        ElementType value = input(i, j, k);
-                        maxValue = std::max(maxValue, value);
-                    }
-                }
-            }
-
-            // Use the max to calculate the Euler value
-            for (size_t i = 0; i < input.NumRows(); i++)
-            {
-                for (size_t j = 0; j < input.NumColumns(); j++)
-                {
-                    for (size_t k = 0; k < input.NumChannels(); k++)
-                    {
-                        ElementType value = input(i, j, k);
-                        ElementType eulerVal = std::exp(value - maxValue);
-                        output(i, j, k) = eulerVal;
-                        sum += eulerVal;
-                    }
-                }
-            }
-
-            // Divide the value by the sum. After this, the sum of all values will be 1.0
-            for (size_t i = 0; i < input.NumRows(); i++)
-            {
-                for (size_t j = 0; j < input.NumColumns(); j++)
-                {
-                    for (size_t k = 0; k < input.NumChannels(); k++)
-                    {
-                        output(i, j, k) /= sum;
-                    }
-                }
-            }
-        }
-
-    } // namespace neural
-} // namespace predictors
-} // namespace ell
diff --git a/libraries/predictors/neural/tcc/TanhActivation.tcc b/libraries/predictors/neural/tcc/TanhActivation.tcc
deleted file mode 100644
index bca7348d4..000000000
--- a/libraries/predictors/neural/tcc/TanhActivation.tcc
+++ /dev/null
@@ -1,28 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     TanhActivation.tcc (neural)
-//  Authors:  James Devine
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace predictors
-{
-    namespace neural
-    {
-        template <typename ElementType>
-        ElementType TanhActivation<ElementType>::Apply(const ElementType input) const
-        {
-            return std::tanh(input);
-        }
-
-        template <typename ElementType>
-        std::unique_ptr<ActivationImpl<ElementType>> TanhActivation<ElementType>::Copy() const
-        {
-            return std::make_unique<TanhActivation<ElementType>>();
-        }
-    } // namespace neural
-} // namespace predictors
-} // namespace ell
diff --git a/libraries/predictors/tcc/ForestPredictor.tcc b/libraries/predictors/tcc/ForestPredictor.tcc
deleted file mode 100644
index 3b1883451..000000000
--- a/libraries/predictors/tcc/ForestPredictor.tcc
+++ /dev/null
@@ -1,413 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     ForestPredictor.tcc (predictors)
-//  Authors:  Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#include <utilities/include/Exception.h>
-
-namespace ell
-{
-namespace predictors
-{
-    template <typename SplitRuleType, typename EdgePredictorType>
-    ForestPredictor<SplitRuleType, EdgePredictorType>::SplittableNodeId::SplittableNodeId(size_t parentNodeIndex, size_t childPosition) :
-        _isRoot(false),
-        _parentNodeIndex(parentNodeIndex),
-        _childPosition(childPosition)
-    {
-    }
-
-    template <typename SplitRuleType, typename EdgePredictorType>
-    ForestPredictor<SplitRuleType, EdgePredictorType>::SplitAction::SplitAction(SplittableNodeId nodeId, SplitRuleType _splitRule, std::vector<EdgePredictorType> edgePredictors) :
-        _nodeId(std::move(nodeId)),
-        _splitRule(std::move(_splitRule)),
-        _edgePredictors(std::move(edgePredictors))
-    {
-    }
-
-    template <typename SplitRuleType, typename EdgePredictorType>
-    ForestPredictor<SplitRuleType, EdgePredictorType>::Edge::Edge(const EdgePredictorType& predictor) :
-        _predictor(predictor),
-        _targetNodeIndex(0)
-    {
-    }
-
-    template <typename SplitRuleType, typename EdgePredictorType>
-    void ForestPredictor<SplitRuleType, EdgePredictorType>::Edge::SetTargetNodeIndex(size_t targetNodeIndex)
-    {
-        _targetNodeIndex = targetNodeIndex;
-    }
-
-    template <typename SplitRuleType, typename EdgePredictorType>
-    bool ForestPredictor<SplitRuleType, EdgePredictorType>::IsTrivial() const
-    {
-        if (_rootIndices.size() == 0 && _bias == 0.0)
-        {
-            return true;
-        }
-        else
-        {
-            return false;
-        }
-    }
-
-    template <typename SplitRuleType, typename EdgePredictorType>
-    size_t ForestPredictor<SplitRuleType, EdgePredictorType>::NumInteriorNodes(size_t interiorNodeIndex) const
-    {
-        if (interiorNodeIndex >= _interiorNodes.size())
-        {
-            return 0;
-        }
-
-        auto const& interiorNode = _interiorNodes[interiorNodeIndex];
-        size_t numInteriorNodes = 1;
-
-        for (const auto& edge : interiorNode._outgoingEdges)
-        {
-            if (edge.IsTargetInterior())
-            {
-                numInteriorNodes += NumInteriorNodes(edge.GetTargetNodeIndex());
-            }
-        }
-
-        return numInteriorNodes;
-    }
-
-    template <typename SplitRuleType, typename EdgePredictorType>
-    size_t ForestPredictor<SplitRuleType, EdgePredictorType>::NumEdges(size_t interiorNodeIndex) const
-    {
-        if (interiorNodeIndex >= _interiorNodes.size())
-        {
-            return 0;
-        }
-
-        auto const& interiorNode = _interiorNodes[interiorNodeIndex];
-        size_t numEdges = interiorNode._outgoingEdges.size();
-
-        for (const auto& edge : interiorNode._outgoingEdges)
-        {
-            if (edge.IsTargetInterior())
-            {
-                numEdges += NumEdges(edge.GetTargetNodeIndex());
-            }
-        }
-
-        return numEdges;
-    }
-
-    template <typename SplitRuleType, typename EdgePredictorType>
-    double ForestPredictor<SplitRuleType, EdgePredictorType>::Predict(const DataVectorType& input) const
-    {
-        double output = _bias;
-        for (auto treeRootIndex : _rootIndices)
-        {
-            output += Predict(input, treeRootIndex);
-        }
-        return output;
-    }
-
-    template <typename SplitRuleType, typename EdgePredictorType>
-    double ForestPredictor<SplitRuleType, EdgePredictorType>::Predict(const DataVectorType& input, size_t interiorNodeIndex) const
-    {
-        if (interiorNodeIndex >= _interiorNodes.size())
-        {
-            return 0.0;
-        }
-
-        double output = 0.0;
-
-        VisitEdgePathToLeaf(input, interiorNodeIndex, [&](const InteriorNode& interiorNode, size_t edgePosition) { output += interiorNode._outgoingEdges[edgePosition]._predictor.Predict(input); });
-
-        return output;
-    }
-
-    template <typename SplitRuleType, typename EdgePredictorType>
-    std::vector<bool> ForestPredictor<SplitRuleType, EdgePredictorType>::GetEdgeIndicatorVector(const DataVectorType& input) const
-    {
-        std::vector<bool> edgeIndicator(_numEdges);
-        for (auto treeRootIndex : _rootIndices)
-        {
-            SetEdgeIndicatorVector(input, edgeIndicator, treeRootIndex);
-        }
-        return edgeIndicator;
-    }
-
-    template <typename SplitRuleType, typename EdgePredictorType>
-    std::vector<bool> ForestPredictor<SplitRuleType, EdgePredictorType>::GetEdgeIndicatorVector(const DataVectorType& input, size_t interiorNodeIndex) const
-    {
-        std::vector<bool> edgeIndicator(_numEdges);
-        SetEdgeIndicatorVector(input, edgeIndicator, interiorNodeIndex);
-        return edgeIndicator;
-    }
-
-    template <typename SplitRuleType, typename EdgePredictorType>
-    size_t ForestPredictor<SplitRuleType, EdgePredictorType>::NumChildren(size_t interiorNodeIndex) const
-    {
-        if (interiorNodeIndex >= _interiorNodes.size())
-        {
-            return 0;
-        }
-        return _interiorNodes[interiorNodeIndex]._outgoingEdges.size();
-    }
-
-    template <typename SplitRuleType, typename EdgePredictorType>
-    typename ForestPredictor<SplitRuleType, EdgePredictorType>::SplittableNodeId ForestPredictor<SplitRuleType, EdgePredictorType>::GetChildId(size_t parentNodeIndex, size_t childPosition) const
-    {
-        // check that the parent exists
-        if (parentNodeIndex >= _interiorNodes.size())
-        {
-            throw utilities::LogicException(utilities::LogicExceptionErrors::illegalState, "invalid identifier requested - parent does not exist");
-        }
-
-        // check that the splittable node exists
-        if (childPosition >= _interiorNodes[parentNodeIndex]._outgoingEdges.size())
-        {
-            throw utilities::LogicException(utilities::LogicExceptionErrors::illegalState, "invalid identifier requested - child does not exist");
-        }
-
-        return SplittableNodeId(parentNodeIndex, childPosition);
-    }
-
-    template <typename SplitRuleType, typename EdgePredictorType>
-    size_t ForestPredictor<SplitRuleType, EdgePredictorType>::Split(const SplitAction& splitAction)
-    {
-        if (splitAction._nodeId._isRoot)
-        {
-            // add interior Node
-            size_t interiorNodeIndex = AddInteriorNode(splitAction);
-
-            // add new tree
-            _rootIndices.push_back(interiorNodeIndex);
-
-            // return ID of new root
-            return interiorNodeIndex;
-        }
-        else
-        {
-            // check that this node wasn't previously split
-            auto& incomingEdge = _interiorNodes[splitAction._nodeId._parentNodeIndex]._outgoingEdges[splitAction._nodeId._childPosition];
-            if (incomingEdge.IsTargetInterior())
-            {
-                throw utilities::LogicException(utilities::LogicExceptionErrors::illegalState, "invalid split in decision tree - node previously split");
-            }
-
-            // add interior Node
-            size_t interiorNodeIndex = AddInteriorNode(splitAction);
-
-            // update the parent about the new interior node
-            incomingEdge.SetTargetNodeIndex(interiorNodeIndex);
-
-            // return ID of new interior node
-            return interiorNodeIndex;
-        }
-    }
-
-    template <typename SplitRuleType, typename EdgePredictorType>
-    void ForestPredictor<SplitRuleType, EdgePredictorType>::AddToBias(double value)
-    {
-        _bias += value;
-    }
-
-    template <typename SplitRuleType, typename EdgePredictorType>
-    void ForestPredictor<SplitRuleType, EdgePredictorType>::WriteToArchive(utilities::Archiver& archiver) const
-    {
-        archiver["interiorNodes"] << _interiorNodes;
-        archiver["rootIndices"] << _rootIndices;
-        archiver["bias"] << _bias;
-        archiver["numEdges"] << _numEdges;
-    }
-
-    template <typename SplitRuleType, typename EdgePredictorType>
-    void ForestPredictor<SplitRuleType, EdgePredictorType>::ReadFromArchive(utilities::Unarchiver& archiver)
-    {
-        archiver["interiorNodes"] >> _interiorNodes;
-        archiver["rootIndices"] >> _rootIndices;
-        archiver["bias"] >> _bias;
-        archiver["numEdges"] >> _numEdges;
-    }
-
-    template <typename SplitRuleType, typename EdgePredictorType>
-    void ForestPredictor<SplitRuleType, EdgePredictorType>::SetEdgeIndicatorVector(const DataVectorType& input, std::vector<bool>& output, size_t interiorNodeIndex) const
-    {
-        if (interiorNodeIndex >= _interiorNodes.size())
-        {
-            return;
-        }
-        VisitEdgePathToLeaf(input, interiorNodeIndex, [&output](const InteriorNode& interiorNode, size_t edgePosition) { output[interiorNode._firstEdgeIndex + edgePosition] = true; });
-    }
-
-    template <typename SplitRuleType, typename EdgePredictorType>
-    size_t ForestPredictor<SplitRuleType, EdgePredictorType>::AddInteriorNode(const SplitAction& splitAction)
-    {
-        size_t numEdges = splitAction._edgePredictors.size();
-
-        // check correctness of splitAction
-        if (numEdges != splitAction._splitRule.NumOutputs())
-        {
-            throw utilities::LogicException(utilities::LogicExceptionErrors::illegalState, "invalid split in decision tree - number of split rule outputs doesn't match fan-out");
-        }
-
-        // get indices
-        size_t interiorNodeIndex = _interiorNodes.size();
-
-        // create the new interior node
-        InteriorNode interiorNode(splitAction, _numEdges);
-        _interiorNodes.push_back(std::move(interiorNode));
-
-        // increment global edge count
-        _numEdges += numEdges;
-
-        return interiorNodeIndex;
-    }
-
-    template <typename SplitRuleType, typename EdgePredictorType>
-    void ForestPredictor<SplitRuleType, EdgePredictorType>::VisitEdgePathToLeaf(const DataVectorType& input, size_t interiorNodeIndex, std::function<void(const InteriorNode&, size_t edgePosition)> operation) const
-    {
-        size_t nodeIndex = interiorNodeIndex;
-
-        do
-        {
-            const auto& interiorNode = _interiorNodes[nodeIndex];
-
-            // which way do we go?
-            int edgePosition = static_cast<int>(interiorNode._splitRule.Predict(input));
-
-            // check for early eject
-            if (edgePosition < 0)
-            {
-                break;
-            }
-
-            // apply the operation
-            operation(interiorNode, edgePosition);
-
-            //follow the edge to the next node
-            const auto& edge = interiorNode._outgoingEdges[edgePosition];
-            nodeIndex = edge.GetTargetNodeIndex();
-        } while (nodeIndex != 0);
-    }
-
-    //
-    // InteriorNode
-    //
-    template <typename SplitRuleType, typename EdgePredictorType>
-    ForestPredictor<SplitRuleType, EdgePredictorType>::InteriorNode::InteriorNode(const SplitAction& splitAction, size_t _firstEdgeIndex) :
-        _splitRule(splitAction._splitRule),
-        _firstEdgeIndex(_firstEdgeIndex)
-    {
-        std::copy(splitAction._edgePredictors.begin(), splitAction._edgePredictors.end(), std::back_inserter(_outgoingEdges));
-    }
-
-    template <typename SplitRuleType, typename EdgePredictorType>
-    void ForestPredictor<SplitRuleType, EdgePredictorType>::InteriorNode::WriteToArchive(utilities::Archiver& archiver) const
-    {
-        archiver["splitRule"] << _splitRule;
-        archiver["outgoingEdges"] << _outgoingEdges;
-        archiver["firstEdgeIndex"] << _firstEdgeIndex;
-    }
-
-    template <typename SplitRuleType, typename EdgePredictorType>
-    void ForestPredictor<SplitRuleType, EdgePredictorType>::InteriorNode::ReadFromArchive(utilities::Unarchiver& archiver)
-    {
-        archiver["splitRule"] >> _splitRule;
-        archiver["outgoingEdges"] >> _outgoingEdges;
-        archiver["firstEdgeIndex"] >> _firstEdgeIndex;
-    }
-
-    //
-    // debugging code
-    //
-
-    template <typename SplitRuleType, typename EdgePredictorType>
-    void ForestPredictor<SplitRuleType, EdgePredictorType>::SplittableNodeId::Print(std::ostream& os) const
-    {
-        if (_isRoot)
-        {
-            os << "root";
-        }
-        else
-        {
-            os << "child " << _childPosition << " of node " << _parentNodeIndex;
-        }
-    }
-
-    template <typename SplitRuleType, typename EdgePredictorType>
-    void ForestPredictor<SplitRuleType, EdgePredictorType>::SplitAction::PrintLine(std::ostream& os, size_t tabs) const
-    {
-        os << std::string(tabs * 4, ' ') << "action = split ";
-        _nodeId.Print(os);
-        os << "\n";
-
-        os << std::string(tabs * 4, ' ') << "rule:\n";
-        _splitRule.PrintLine(os, tabs + 1);
-
-        os << std::string(tabs * 4, ' ') << "edge predictors:\n";
-        for (const auto& predictor : _edgePredictors)
-        {
-            predictor.PrintLine(os, tabs + 1);
-        }
-    }
-
-    //
-    // debugging members
-    //
-
-    template <typename SplitRuleType, typename EdgePredictorType>
-    void ForestPredictor<SplitRuleType, EdgePredictorType>::PrintLine(std::ostream& os, size_t tabs) const
-    {
-        os << std::string(tabs * 4, ' ') << "Forest Predictor: bias = " << _bias << "\n";
-        for (const auto& interiorNode : _interiorNodes)
-        {
-            interiorNode.PrintLine(os, tabs + 1);
-        }
-        for (auto treeRootIndex : _rootIndices)
-        {
-            os << std::string(tabs * 4, ' ') << "Tree: root index = " << treeRootIndex << "\n";
-        }
-    }
-
-    template <typename SplitRuleType, typename EdgePredictorType>
-    void ForestPredictor<SplitRuleType, EdgePredictorType>::InteriorNode::PrintLine(std::ostream& os, size_t tabs) const
-    {
-        os << std::string(tabs * 4, ' ') << "InteriorNode:\n";
-        _splitRule.PrintLine(os, tabs + 1);
-        for (const auto& edge : _outgoingEdges)
-        {
-            edge.PrintLine(os, tabs + 1);
-        }
-    }
-    //
-    // Edge
-    //
-    template <typename SplitRuleType, typename EdgePredictorType>
-    void ForestPredictor<SplitRuleType, EdgePredictorType>::Edge::PrintLine(std::ostream& os, size_t tabs) const
-    {
-        os << std::string(tabs * 4, ' ') << "Edge:\n";
-        _predictor.PrintLine(os, tabs + 1);
-        os << std::string(tabs * 4, ' ') << "Target node index = " << _targetNodeIndex << "\n";
-    }
-
-    template <typename SplitRuleType, typename EdgePredictorType>
-    bool ForestPredictor<SplitRuleType, EdgePredictorType>::Edge::IsTargetInterior() const
-    {
-        return _targetNodeIndex == 0 ? false : true;
-    }
-
-    template <typename SplitRuleType, typename EdgePredictorType>
-    void ForestPredictor<SplitRuleType, EdgePredictorType>::Edge::WriteToArchive(utilities::Archiver& archiver) const
-    {
-        archiver["predictor"] << _predictor;
-        archiver["targetNodeIndex"] << _targetNodeIndex;
-    }
-
-    template <typename SplitRuleType, typename EdgePredictorType>
-    void ForestPredictor<SplitRuleType, EdgePredictorType>::Edge::ReadFromArchive(utilities::Unarchiver& archiver)
-    {
-        archiver["predictor"] >> _predictor;
-        archiver["targetNodeIndex"] >> _targetNodeIndex;
-    }
-} // namespace predictors
-} // namespace ell
diff --git a/libraries/predictors/tcc/LinearPredictor.tcc b/libraries/predictors/tcc/LinearPredictor.tcc
deleted file mode 100644
index e4eb8b7a3..000000000
--- a/libraries/predictors/tcc/LinearPredictor.tcc
+++ /dev/null
@@ -1,96 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     LinearPredictor.tcc (predictors)
-//  Authors:  Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#include <data/include/DataVectorOperations.h>
-
-#include <math/include/VectorOperations.h>
-
-#include <memory>
-
-namespace ell
-{
-namespace predictors
-{
-    template <typename ElementType>
-    LinearPredictor<ElementType>::LinearPredictor(size_t dim) :
-        _w(dim),
-        _b(0)
-    {
-    }
-
-    template <typename ElementType>
-    LinearPredictor<ElementType>::LinearPredictor(const math::ColumnVector<ElementType>& weights, ElementType bias) :
-        _w(weights),
-        _b(bias)
-    {
-    }
-
-    template <typename ElementType>
-    template <typename OtherElementType>
-    LinearPredictor<ElementType>::LinearPredictor(const LinearPredictor<OtherElementType>& other) :
-        _b(other.GetBias())
-    {
-        auto weights = other.GetWeights();
-        _w.Resize(weights.Size());
-        for (size_t i = 0; i < weights.Size(); ++i)
-        {
-            _w[i] = static_cast<ElementType>(weights[i]);
-        }
-    }
-
-    template <typename ElementType>
-    void LinearPredictor<ElementType>::Reset()
-    {
-        _w.Reset();
-        _b = 0;
-    }
-
-    template <typename ElementType>
-    void LinearPredictor<ElementType>::Resize(size_t size)
-    {
-        _w.Resize(size);
-    }
-
-    template <typename ElementType>
-    ElementType LinearPredictor<ElementType>::Predict(const DataVectorType& dataVector) const
-    {
-        return _w * dataVector + _b;
-    }
-
-    template <typename ElementType>
-    auto LinearPredictor<ElementType>::GetWeightedElements(const DataVectorType& dataVector) const -> DataVectorType
-    {
-        auto transformation = [&](data::IndexValue indexValue) -> ElementType { return indexValue.value * _w[indexValue.index]; };
-        return dataVector.TransformAs<data::IterationPolicy::skipZeros, DataVectorType>(transformation);
-    }
-
-    template <typename ElementType>
-    void LinearPredictor<ElementType>::Scale(ElementType scalar)
-    {
-        _w *= scalar;
-        _b *= scalar;
-    }
-
-    template <typename ElementType>
-    void LinearPredictor<ElementType>::WriteToArchive(utilities::Archiver& archiver) const
-    {
-        auto w = _w.ToArray();
-        archiver["w"] << w;
-        archiver["b"] << _b;
-    }
-
-    template <typename ElementType>
-    void LinearPredictor<ElementType>::ReadFromArchive(utilities::Unarchiver& archiver)
-    {
-        std::vector<ElementType> w;
-        archiver["w"] >> w;
-        _w = math::ColumnVector<ElementType>(std::move(w));
-        archiver["b"] >> _b;
-    }
-} // namespace predictors
-} // namespace ell
diff --git a/libraries/predictors/tcc/NeuralNetworkPredictor.tcc b/libraries/predictors/tcc/NeuralNetworkPredictor.tcc
deleted file mode 100644
index 0a4eeaa9e..000000000
--- a/libraries/predictors/tcc/NeuralNetworkPredictor.tcc
+++ /dev/null
@@ -1,210 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     NeuralNetworkPredictor.tcc (predictors)
-//  Authors:  Byron Changuion
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-//stl
-#include <iostream>
-
-namespace ell
-{
-namespace predictors
-{
-    constexpr utilities::ArchiveVersion c_currentNeuralNetworkPredictorArchiveVersion = { utilities::ArchiveVersionNumbers::v1 };
-
-    template <typename ElementType>
-    NeuralNetworkPredictor<ElementType>::NeuralNetworkPredictor(InputLayerReference&& inputLayer, Layers&& layers) :
-        _inputLayer(std::move(inputLayer)),
-        _layers(std::move(layers)),
-        _output(_layers.back()->GetOutput().Size())
-    {
-    }
-
-    template <typename ElementType>
-    void NeuralNetworkPredictor<ElementType>::RemoveLastLayers(size_t numberToRemove)
-    {
-        if (_layers.size() > numberToRemove)
-        {
-            _layers.resize(_layers.size() - numberToRemove);
-            _output.resize(_layers.back()->GetOutput().Size());
-        }
-        else
-        {
-            throw utilities::InputException(utilities::InputExceptionErrors::sizeMismatch, "RemoveLastLayers numberToRemove exceeds number of layers.");
-        }
-    }
-
-    template <typename ElementType>
-    typename NeuralNetworkPredictor<ElementType>::Shape NeuralNetworkPredictor<ElementType>::GetInputShape() const
-    {
-        if (_inputLayer != nullptr)
-        {
-            return _inputLayer->GetInputShape();
-        }
-        return { 0, 0, 0 };
-    }
-
-    template <typename ElementType>
-    typename NeuralNetworkPredictor<ElementType>::Shape NeuralNetworkPredictor<ElementType>::GetOutputShape() const
-    {
-        if (_layers.size() > 0)
-        {
-            return _layers.back()->GetOutputShape();
-        }
-        return { 0, 0, 0 };
-    }
-
-    template <typename ElementType>
-    const std::vector<ElementType>& NeuralNetworkPredictor<ElementType>::Predict(const DataVectorType& dataVector) const
-    {
-        if (_inputLayer != nullptr)
-        {
-            _inputLayer->SetInput(dataVector);
-            _inputLayer->Compute();
-        }
-        Compute();
-        return _output;
-    }
-
-    template <typename ElementType>
-    const std::vector<ElementType>& NeuralNetworkPredictor<ElementType>::Predict(const std::vector<ElementType>& input) const
-    {
-        if (_inputLayer != nullptr)
-        {
-            _inputLayer->SetInput(input);
-            _inputLayer->Compute();
-        }
-        Compute();
-        return _output;
-    }
-
-    template <typename ElementType>
-    void NeuralNetworkPredictor<ElementType>::Compute() const
-    {
-        // Forward feed inputs through the layers
-        for (size_t i = 0; i < _layers.size(); i++)
-        {
-            _layers[i]->Compute();
-            // Uncomment the following line to print layer info
-            //_layers[i]->Print(std::cout);
-        }
-
-        if (_layers.size() > 0)
-        {
-            auto output = _layers.back()->GetOutput();
-            size_t vectorIndex = 0;
-
-            //_output.resize(output.NumElements());
-            for (size_t i = 0; i < output.NumRows(); i++)
-            {
-                for (size_t j = 0; j < output.NumColumns(); j++)
-                {
-                    for (size_t k = 0; k < output.NumChannels(); k++)
-                    {
-                        _output[vectorIndex++] = output(i, j, k);
-                    }
-                }
-            }
-        }
-        else
-        {
-            _output.assign(_output.size(), 0);
-        }
-    }
-
-    template <typename ElementType>
-    void NeuralNetworkPredictor<ElementType>::Reset()
-    {
-        for (size_t i = 0; i < _layers.size(); i++)
-        {
-            _layers[i]->Reset();
-        }
-    }
-
-    template <typename ElementType>
-    void NeuralNetworkPredictor<ElementType>::WriteToArchive(utilities::Archiver& archiver) const
-    {
-        archiver["inputLayer"] << _inputLayer.get();
-
-        std::vector<const neural::Layer<ElementType>*> layerElements;
-        for (size_t i = 0; i < _layers.size(); i++)
-        {
-            layerElements.emplace_back(_layers[i].get());
-        }
-        archiver["layers"] << layerElements;
-        archiver["output"] << _output;
-    }
-
-    template <typename ElementType>
-    void NeuralNetworkPredictor<ElementType>::ReadFromArchive(utilities::Unarchiver& archiver)
-    {
-        neural::LayerSerializationContext<ElementType> layerContext(archiver.GetContext());
-        archiver.PushContext(layerContext);
-
-        std::unique_ptr<neural::InputLayer<ElementType>> inputLayer;
-        archiver["inputLayer"] >> inputLayer;
-        _inputLayer = std::move(inputLayer);
-
-        std::vector<const neural::Layer<ElementType>*> layerElements;
-        archiver["layers"] >> layerElements;
-        _layers.resize(layerElements.size());
-        for (size_t i = 0; i < layerElements.size(); i++)
-        {
-            _layers[i].reset((neural::Layer<ElementType>*)layerElements[i]);
-        }
-        archiver["output"] >> _output;
-
-        archiver.PopContext();
-    }
-
-    template <typename ElementType>
-    void NeuralNetworkPredictor<ElementType>::RegisterNeuralNetworkPredictorTypes(utilities::SerializationContext& context)
-    {
-        using namespace ell::predictors::neural;
-
-        context.GetTypeFactory().AddType<neural::InputLayer<ElementType>, neural::InputLayer<ElementType>>();
-        context.GetTypeFactory().AddType<neural::Layer<ElementType>, neural::ActivationLayer<ElementType>>();
-        context.GetTypeFactory().AddType<neural::Layer<ElementType>, neural::BatchNormalizationLayer<ElementType>>();
-        context.GetTypeFactory().AddType<neural::Layer<ElementType>, neural::BiasLayer<ElementType>>();
-        context.GetTypeFactory().AddType<neural::Layer<ElementType>, neural::BinaryConvolutionalLayer<ElementType>>();
-        context.GetTypeFactory().AddType<neural::Layer<ElementType>, neural::ConvolutionalLayer<ElementType>>();
-        context.GetTypeFactory().AddType<neural::Layer<ElementType>, neural::FullyConnectedLayer<ElementType>>();
-        context.GetTypeFactory().AddType<neural::Layer<ElementType>, neural::PoolingLayer<ElementType, MaxPoolingFunction>>();
-        context.GetTypeFactory().AddType<neural::Layer<ElementType>, neural::PoolingLayer<ElementType, MeanPoolingFunction>>();
-        context.GetTypeFactory().AddType<neural::Layer<ElementType>, neural::RegionDetectionLayer<ElementType>>();
-        context.GetTypeFactory().AddType<neural::Layer<ElementType>, neural::ScalingLayer<ElementType>>();
-        context.GetTypeFactory().AddType<neural::Layer<ElementType>, neural::SoftmaxLayer<ElementType>>();
-        context.GetTypeFactory().AddType<NeuralNetworkPredictor<ElementType>, NeuralNetworkPredictor<ElementType>>();
-
-        // Map the old type names to the new ones for compatibility reasons.
-        context.GetTypeFactory().AddType<neural::Layer<ElementType>, neural::ActivationLayer<float>>("ActivationLayer<float,SigmoidActivation>");
-        context.GetTypeFactory().AddType<neural::Layer<ElementType>, neural::ActivationLayer<float>>("ActivationLayer<float,HardSigmoidActivation<float>>");
-        context.GetTypeFactory().AddType<neural::Layer<ElementType>, neural::ActivationLayer<float>>("ActivationLayer<float,ReLUActivation>");
-        context.GetTypeFactory().AddType<neural::Layer<ElementType>, neural::ActivationLayer<float>>("ActivationLayer<float,LeakyReLUActivation>");
-        context.GetTypeFactory().AddType<neural::Layer<ElementType>, neural::ActivationLayer<float>>("ActivationLayer<float,TanhActivation>");
-        context.GetTypeFactory().AddType<neural::Layer<ElementType>, neural::ActivationLayer<float>>("ActivationLayer<float,ParametricReLUActivation>");
-
-        context.GetTypeFactory().AddType<neural::Layer<ElementType>, neural::ActivationLayer<double>>("ActivationLayer<double,SigmoidActivation>");
-        context.GetTypeFactory().AddType<neural::Layer<ElementType>, neural::ActivationLayer<double>>("ActivationLayer<double,HardSigmoidActivation<double>>");
-        context.GetTypeFactory().AddType<neural::Layer<ElementType>, neural::ActivationLayer<double>>("ActivationLayer<double,ReLUActivation>");
-        context.GetTypeFactory().AddType<neural::Layer<ElementType>, neural::ActivationLayer<double>>("ActivationLayer<double,LeakyReLUActivation>");
-        context.GetTypeFactory().AddType<neural::Layer<ElementType>, neural::ActivationLayer<double>>("ActivationLayer<double,TanhActivation>");
-        context.GetTypeFactory().AddType<neural::Layer<ElementType>, neural::ActivationLayer<double>>("ActivationLayer<double,ParametricReLUActivation>");
-    }
-
-    template <typename ElementType>
-    utilities::ArchiveVersion NeuralNetworkPredictor<ElementType>::GetCurrentArchiveVersion()
-    {
-        return c_currentNeuralNetworkPredictorArchiveVersion;
-    }
-
-    template <typename ElementType>
-    utilities::ArchiveVersion NeuralNetworkPredictor<ElementType>::GetArchiveVersion() const
-    {
-        return GetCurrentArchiveVersion();
-    }
-} // namespace predictors
-} // namespace ell
diff --git a/libraries/predictors/tcc/Normalizer.tcc b/libraries/predictors/tcc/Normalizer.tcc
deleted file mode 100644
index 4817cf953..000000000
--- a/libraries/predictors/tcc/Normalizer.tcc
+++ /dev/null
@@ -1,32 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     Normalizer.tcc (predictors)
-//  Authors:  Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace predictors
-{
-    template <data::IterationPolicy policy, typename TransformationType>
-    inline Normalizer<policy, TransformationType>::Normalizer(TransformationType transformation) :
-        _transformation(transformation)
-    {
-    }
-
-    template <data::IterationPolicy policy, typename TransformationType>
-    template <typename OutputDataVectorType, typename InputDataVectorType>
-    OutputDataVectorType Normalizer<policy, TransformationType>::Compute(const InputDataVectorType& input) const
-    {
-        return data::TransformAs<InputDataVectorType, policy, OutputDataVectorType>(input, _transformation);
-    }
-
-    template <data::IterationPolicy policy, typename TransformationType>
-    Normalizer<policy, TransformationType> MakeTransformationNormalizer(TransformationType transformation)
-    {
-        return Normalizer<policy, TransformationType>(transformation);
-    }
-} // namespace predictors
-} // namespace ell
diff --git a/libraries/predictors/tcc/SignPredictor.tcc b/libraries/predictors/tcc/SignPredictor.tcc
deleted file mode 100644
index a03cf7cac..000000000
--- a/libraries/predictors/tcc/SignPredictor.tcc
+++ /dev/null
@@ -1,56 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     PredictorOutputAdapter.tcc (predictors)
-//  Authors:  Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace predictors
-{
-    template <typename PredictorType>
-    SignPredictor<PredictorType> MakeSignPredictor(PredictorType predictor)
-    {
-        return SignPredictor<PredictorType>(std::move(predictor));
-    }
-
-    template <typename PredictorType>
-    SignPredictor<PredictorType>::SignPredictor(PredictorType predictor) :
-        _predictor(std::move(predictor))
-    {
-    }
-
-    template <typename PredictorType>
-    PredictorType& SignPredictor<PredictorType>::GetPredictor()
-    {
-        return _predictor;
-    }
-
-    template <typename PredictorType>
-    const PredictorType& SignPredictor<PredictorType>::GetPredictor() const
-    {
-        return _predictor;
-    }
-
-    template <typename PredictorType>
-    bool SignPredictor<PredictorType>::Predict(const DataVectorType& dataVector) const
-    {
-        auto prediction = _predictor.Predict(dataVector);
-        return prediction > 0;
-    }
-
-    template <typename PredictorType>
-    void SignPredictor<PredictorType>::WriteToArchive(utilities::Archiver& archiver) const
-    {
-        archiver["predictor"] << _predictor;
-    }
-
-    template <typename PredictorType>
-    void SignPredictor<PredictorType>::ReadFromArchive(utilities::Unarchiver& archiver)
-    {
-        archiver["predictor"] >> _predictor;
-    }
-} // namespace predictors
-} // namespace ell
diff --git a/libraries/predictors/test/include/LinearPredictorTests.h b/libraries/predictors/test/include/LinearPredictorTests.h
index cea05cb66..762915798 100644
--- a/libraries/predictors/test/include/LinearPredictorTests.h
+++ b/libraries/predictors/test/include/LinearPredictorTests.h
@@ -13,4 +13,30 @@
 template <typename ElementType>
 void LinearPredictorTest();
 
-#include "../tcc/LinearPredictorTests.tcc"
\ No newline at end of file
+#pragma region implementation
+
+#include <predictors/include/LinearPredictor.h>
+
+#include <testing/include/testing.h>
+
+using namespace ell;
+
+template <typename ElementType>
+void LinearPredictorTest()
+{
+    // test that default constructor works.
+    predictors::LinearPredictor<ElementType> predictor0;
+    testing::ProcessTest("DefaultLinearPredictor", testing::IsEqual(predictor0.GetBias(), static_cast<ElementType>(0)));
+
+    // now test a known predictor.
+    using DataVectorType = typename predictors::LinearPredictor<ElementType>::DataVectorType;
+    math::ColumnVector<ElementType> weights({ 1, 2, 3, 4, 5 });
+    ElementType bias = 1.5;
+
+    predictors::LinearPredictor<ElementType> predictor(weights, bias);
+    auto result = predictor.Predict(DataVectorType{ 1.0, 2.0, 1.0, -1.0, 0.5 });
+
+    testing::ProcessTest("TestLinearPredictor", testing::IsEqual(result, static_cast<ElementType>(8.0)));
+}
+
+#pragma endregion implementation
diff --git a/libraries/predictors/test/include/NeuralNetworkPredictorTests.h b/libraries/predictors/test/include/NeuralNetworkPredictorTests.h
index 4863359ac..dbc986443 100644
--- a/libraries/predictors/test/include/NeuralNetworkPredictorTests.h
+++ b/libraries/predictors/test/include/NeuralNetworkPredictorTests.h
@@ -59,4 +59,848 @@ void ConvolutionalArchiveTest();
 template <typename ElementType>
 void BinaryConvolutionalArchiveTest();
 
-#include "../tcc/NeuralNetworkPredictorTests.tcc"
+#pragma region implementation
+
+#include <common/include/LoadModel.h>
+
+#include <predictors/include/NeuralNetworkPredictor.h>
+#include <predictors/neural/include/HardSigmoidActivation.h>
+#include <predictors/neural/include/LeakyReLUActivation.h>
+#include <predictors/neural/include/MaxPoolingFunction.h>
+#include <predictors/neural/include/ParametricReLUActivation.h>
+#include <predictors/neural/include/ReLUActivation.h>
+#include <predictors/neural/include/SigmoidActivation.h>
+#include <predictors/neural/include/SoftMaxActivation.h>
+#include <predictors/neural/include/TanhActivation.h>
+
+#include <testing/include/testing.h>
+
+#include <utilities/include/JsonArchiver.h>
+
+using namespace ell;
+using namespace ell::common;
+
+inline bool Equals(double a, double b)
+{
+    if (std::abs(a - b) < 0.0001)
+        return true;
+    return false;
+}
+
+template <typename ElementType>
+void ActivationTest()
+{
+    using namespace ell::predictors;
+    using namespace ell::predictors::neural;
+    using TensorType = typename Layer<ElementType>::TensorType;
+
+    TensorType T0(2, 2, 2);
+    T0(0, 0, 0) = static_cast<ElementType>(1.0);
+    T0(0, 1, 0) = static_cast<ElementType>(-2.0);
+    T0(1, 0, 1) = static_cast<ElementType>(3.0);
+    T0(1, 1, 1) = static_cast<ElementType>(-4.0);
+
+    TensorType T1(2, 2, 2);
+
+    auto hardSigmoid = HardSigmoidActivation<ElementType>();
+    for (size_t i = 0; i < T0.NumRows(); ++i)
+    {
+        for (size_t j = 0; j < T0.NumColumns(); ++j)
+        {
+            for (size_t k = 0; k < T0.NumChannels(); ++k)
+            {
+                T1(i, j, k) = hardSigmoid.ApplyIndex(T0(i, j, k), { i, j, k });
+            }
+        }
+    }
+    testing::ProcessTest("Testing HardSigmoidActivation", Equals(T1(0, 0, 0), 0.7) && Equals(T1(0, 1, 0), 0.1) && T1(1, 0, 1) == 1 && T1(1, 1, 1) == 0 && T1(0, 0, 1) == 0.5 && T1(0, 1, 1) == 0.5 && T1(1, 0, 0) == 0.5 && T1(1, 1, 0) == 0.5);
+
+    auto relu = ReLUActivation<ElementType>();
+    for (size_t i = 0; i < T0.NumRows(); ++i)
+    {
+        for (size_t j = 0; j < T0.NumColumns(); ++j)
+        {
+            for (size_t k = 0; k < T0.NumChannels(); ++k)
+            {
+                T1(i, j, k) = relu.ApplyIndex(T0(i, j, k), { i, j, k });
+            }
+        }
+    }
+    testing::ProcessTest("Testing ReLUActivation", T1(0, 0, 0) == 1.0 && T1(0, 1, 0) == 0 && T1(1, 0, 1) == 3.0 && T1(1, 1, 1) == 0);
+
+    auto leakyRelu = LeakyReLUActivation<ElementType>(static_cast<ElementType>(0.1));
+    for (size_t i = 0; i < T0.NumRows(); ++i)
+    {
+        for (size_t j = 0; j < T0.NumColumns(); ++j)
+        {
+            for (size_t k = 0; k < T0.NumChannels(); ++k)
+            {
+                T1(i, j, k) = leakyRelu.ApplyIndex(T0(i, j, k), { i, j, k });
+            }
+        }
+    }
+    testing::ProcessTest("Testing LeakyReLUActivation", Equals(T1(0, 0, 0), 1.0) && Equals(T1(0, 1, 0), -0.2) && Equals(T1(1, 0, 1), 3.0) && Equals(T1(1, 1, 1), -0.4));
+
+    TensorType alpha(2, 2, 2);
+    alpha(0, 0, 0) = static_cast<ElementType>(0.1);
+    alpha(0, 1, 0) = static_cast<ElementType>(0.2);
+    alpha(1, 0, 1) = static_cast<ElementType>(0.3);
+    alpha(1, 1, 1) = static_cast<ElementType>(0.4);
+
+    auto parametricRelu = ParametricReLUActivation<ElementType>(alpha);
+    for (size_t i = 0; i < T0.NumRows(); ++i)
+    {
+        for (size_t j = 0; j < T0.NumColumns(); ++j)
+        {
+            for (size_t k = 0; k < T0.NumChannels(); ++k)
+            {
+                T1(i, j, k) = parametricRelu.ApplyIndex(T0(i, j, k), { i, j, k });
+            }
+        }
+    }
+    testing::ProcessTest("Testing ParametricReLUActivation", Equals(T1(0, 0, 0), 1.0) && Equals(T1(0, 1, 0), -0.4) && Equals(T1(1, 0, 1), 3.0) && Equals(T1(1, 1, 1), -1.6));
+
+    auto sigmoid = SigmoidActivation<ElementType>();
+    for (size_t i = 0; i < T0.NumRows(); ++i)
+    {
+        for (size_t j = 0; j < T0.NumColumns(); ++j)
+        {
+            for (size_t k = 0; k < T0.NumChannels(); ++k)
+            {
+                T1(i, j, k) = sigmoid.ApplyIndex(T0(i, j, k), { i, j, k });
+            }
+        }
+    }
+    testing::ProcessTest("Testing SigmoidActivation", Equals(T1(0, 0, 0), 0.73106) && Equals(T1(0, 1, 0), 0.11920) && Equals(T1(1, 0, 1), 0.95257) && Equals(T1(1, 1, 1), 0.017986));
+}
+
+template <typename ElementType>
+void LayerBaseTest()
+{
+    using namespace ell::predictors;
+    using namespace ell::predictors::neural;
+    using LayerParameters = typename Layer<ElementType>::LayerParameters;
+    using TensorType = typename Layer<ElementType>::TensorType;
+    using Shape = typename Layer<ElementType>::Shape;
+
+    // Verify LayerBase
+    TensorType input0(12, 12, 3);
+    PaddingParameters paddingParameters2{ PaddingScheme::alternatingZeroAndOnes, 1 };
+    Shape outputShape = { 12, 12, 6 };
+    LayerParameters layerParameters{ input0, ZeroPadding(1), outputShape, paddingParameters2 };
+
+    Layer<ElementType> baseLayer(layerParameters);
+    auto layerBaseOutput = baseLayer.GetOutput();
+    testing::ProcessTest("Testing LayerBase, output tensor", layerBaseOutput.NumRows() == 12 && layerBaseOutput.NumColumns() == 12 && layerBaseOutput.NumChannels() == 6);
+    testing::ProcessTest("Testing LayerBase, output tensor padding values", layerBaseOutput(0, 0, 0) == 0 && layerBaseOutput(0, 1, 0) == 1 && layerBaseOutput(0, 2, 0) == 0 && layerBaseOutput(0, 3, 0) == 1);
+}
+
+template <typename ElementType>
+void ActivationLayerTest()
+{
+    using namespace ell::predictors;
+    using namespace ell::predictors::neural;
+    using LayerParameters = typename Layer<ElementType>::LayerParameters;
+    using TensorType = typename Layer<ElementType>::TensorType;
+    using Shape = typename Layer<ElementType>::Shape;
+
+    // Verify ActivationLayer
+    TensorType activationInput(2, 2, 2);
+    activationInput(0, 0, 0) = 1.0;
+    activationInput(0, 1, 0) = -2.0;
+    activationInput(1, 0, 1) = 3.0;
+    activationInput(1, 1, 1) = -4.0;
+    Shape activationOutputShape = { 4, 4, 2 };
+    LayerParameters activationParameters{ activationInput, NoPadding(), activationOutputShape, ZeroPadding(1) };
+
+    ActivationLayer<ElementType> activationLayer(activationParameters, new ReLUActivation<ElementType>());
+    activationLayer.Compute();
+    auto output0 = activationLayer.GetOutput();
+    testing::ProcessTest("Testing ActivationLayer, values", output0(1, 1, 0) == 1.0 && output0(1, 2, 0) == 0 && output0(2, 1, 1) == 3.0 && output0(2, 2, 1) == 0);
+    testing::ProcessTest("Testing ActivationLayer, padding", output0(0, 0, 0) == 0 && output0(0, 1, 0) == 0 && output0(2, 3, 1) == 0 && output0(3, 3, 1) == 0);
+}
+
+template <typename ElementType>
+void BatchNormalizationLayerTest()
+{
+    using namespace ell::predictors;
+    using namespace ell::predictors::neural;
+    using LayerParameters = typename Layer<ElementType>::LayerParameters;
+    using TensorType = typename Layer<ElementType>::TensorType;
+    using Shape = typename Layer<ElementType>::Shape;
+    using VectorType = typename Layer<ElementType>::VectorType;
+
+    // Verify BatchNormailzationLayer
+    TensorType bnInput(2, 2, 2);
+    bnInput(0, 0, 0) = 11;
+    bnInput(0, 1, 0) = 7;
+    bnInput(1, 0, 1) = 30;
+    bnInput(1, 1, 1) = 50;
+    Shape bnOutputShape = { 4, 4, 2 };
+    LayerParameters bnParameters{ bnInput, NoPadding(), bnOutputShape, ZeroPadding(1) };
+    VectorType mean({ 5, 10 });
+    VectorType variance({ 4.0, 16.0 });
+
+    BatchNormalizationLayer<ElementType> bnLayer(bnParameters, mean, variance, static_cast<ElementType>(1e-6), EpsilonSummand::SqrtVariance);
+    bnLayer.Compute();
+    auto output1 = bnLayer.GetOutput();
+    testing::ProcessTest("Testing BatchNormailzationLayer, values", Equals(output1(1, 1, 0), 3.0) && Equals(output1(1, 2, 0), 1.0) && Equals(output1(2, 1, 1), 5.0) && Equals(output1(2, 2, 1), 10.0));
+    testing::ProcessTest("Testing BatchNormailzationLayer, padding", output1(0, 0, 0) == 0 && output1(0, 1, 0) == 0 && output1(2, 3, 1) == 0 && output1(3, 3, 1) == 0);
+}
+
+template <typename ElementType>
+void BiasLayerTest()
+{
+    using namespace ell::predictors;
+    using namespace ell::predictors::neural;
+    using LayerParameters = typename Layer<ElementType>::LayerParameters;
+    using TensorType = typename Layer<ElementType>::TensorType;
+    using Shape = typename Layer<ElementType>::Shape;
+    using VectorType = typename Layer<ElementType>::VectorType;
+
+    // Verify BiasLayer
+    TensorType input(2, 2, 2);
+    input(0, 0, 0) = 1;
+    input(0, 1, 0) = 2;
+    input(1, 0, 1) = 3;
+    input(1, 1, 1) = 4;
+    Shape outputShape = { 4, 4, 2 };
+    LayerParameters parameters{ input, NoPadding(), outputShape, ZeroPadding(1) };
+    VectorType bias({ 5, 10 });
+
+    BiasLayer<ElementType> biasLayer(parameters, bias);
+    biasLayer.Compute();
+    auto output = biasLayer.GetOutput();
+    testing::ProcessTest("Testing BiasLayer, values", Equals(output(1, 1, 0), 6.0) && Equals(output(1, 2, 0), 7.0) && Equals(output(2, 1, 1), 13.0) && Equals(output(2, 2, 1), 14.0));
+    testing::ProcessTest("Testing BiasLayer, padding", output(0, 0, 0) == 0 && output(0, 1, 0) == 0 && output(2, 3, 1) == 0 && output(3, 3, 1) == 0);
+}
+
+template <typename ElementType>
+void InputLayerTest()
+{
+    using namespace ell::predictors;
+    using namespace ell::predictors::neural;
+    using Shape = typename Layer<ElementType>::Shape;
+
+    // Verify Input
+    Shape inputShape = { 2, 2, 2 };
+    Shape outputShape = { 4, 4, 2 };
+    typename InputLayer<ElementType>::InputParameters parameters{ inputShape, NoPadding(), outputShape, ZeroPadding(1), 2.0 };
+
+    InputLayer<ElementType> inputLayer(parameters);
+    inputLayer.SetInput(std::vector<ElementType>({ 1, 2, 3, 4, 5, 6, 7, 8 }));
+    inputLayer.Compute();
+    auto output = inputLayer.GetOutput();
+    testing::ProcessTest("Testing InputLayer, values", Equals(output(1, 1, 0), 2.0) && Equals(output(1, 2, 0), 6.0) && Equals(output(2, 1, 1), 12.0) && Equals(output(2, 2, 1), 16.0));
+    testing::ProcessTest("Testing InputLayer, padding", output(0, 0, 0) == 0 && output(0, 1, 0) == 0 && output(2, 3, 1) == 0 && output(3, 3, 1) == 0);
+}
+
+template <typename ElementType>
+void ScalingLayerTest()
+{
+    using namespace ell::predictors;
+    using namespace ell::predictors::neural;
+    using LayerParameters = typename Layer<ElementType>::LayerParameters;
+    using TensorType = typename Layer<ElementType>::TensorType;
+    using Shape = typename Layer<ElementType>::Shape;
+    using VectorType = typename Layer<ElementType>::VectorType;
+
+    // Verify BiasLayer
+    TensorType input(2, 2, 2);
+    input(0, 0, 0) = 1;
+    input(0, 1, 0) = 2;
+    input(1, 0, 1) = 3;
+    input(1, 1, 1) = 4;
+    Shape outputShape = { 4, 4, 2 };
+    LayerParameters parameters{ input, NoPadding(), outputShape, ZeroPadding(1) };
+    VectorType scales({ 2, 0.5 });
+
+    ScalingLayer<ElementType> scalingLayer(parameters, scales);
+    scalingLayer.Compute();
+    auto output = scalingLayer.GetOutput();
+    testing::ProcessTest("Testing ScalingLayer, values", Equals(output(1, 1, 0), 2.0) && Equals(output(1, 2, 0), 4) && Equals(output(2, 1, 1), 1.5) && Equals(output(2, 2, 1), 2.0));
+    testing::ProcessTest("Testing ScalingLayer, padding", output(0, 0, 0) == 0 && output(0, 1, 0) == 0 && output(2, 3, 1) == 0 && output(3, 3, 1) == 0);
+}
+
+template <typename ElementType>
+void FullyConnectedLayerTest()
+{
+    using namespace ell::predictors;
+    using namespace ell::predictors::neural;
+    using LayerParameters = typename Layer<ElementType>::LayerParameters;
+    using TensorType = typename Layer<ElementType>::TensorType;
+    using Shape = typename Layer<ElementType>::Shape;
+    using MatrixType = typename Layer<ElementType>::MatrixType;
+
+    // Verify FullyConnectedLayer
+    TensorType input(2, 2, 1);
+    input.Fill(1);
+    Shape outputShape = { 3, 5, 1 };
+    LayerParameters parameters{ input, NoPadding(), outputShape, ZeroPadding(1) };
+    MatrixType weights(3, 4);
+    weights(0, 0) = 1;
+    weights(0, 1) = 1;
+    weights(0, 2) = 1;
+    weights(0, 3) = 2;
+    weights(1, 0) = 1;
+    weights(1, 1) = 1;
+    weights(1, 2) = 1;
+    weights(1, 3) = 3;
+    weights(2, 0) = 1;
+    weights(2, 1) = 1;
+    weights(2, 2) = 1;
+    weights(2, 3) = 4;
+
+    FullyConnectedLayer<ElementType> connectedLayer(parameters, weights);
+    connectedLayer.Compute();
+    auto output = connectedLayer.GetOutput();
+    testing::ProcessTest("Testing FullyConnectedLayer, values", Equals(output(1, 1, 0), 5.0) && Equals(output(1, 2, 0), 6.0) && Equals(output(1, 3, 0), 7.0));
+    testing::ProcessTest("Testing FullyConnectedLayer, padding", output(0, 0, 0) == 0 && output(0, 1, 0) == 0 && output(1, 4, 0) == 0 && output(2, 4, 0) == 0);
+}
+
+template <typename ElementType>
+void PoolingLayerTest()
+{
+    using namespace ell::predictors;
+    using namespace ell::predictors::neural;
+    using LayerParameters = typename Layer<ElementType>::LayerParameters;
+    using TensorType = typename Layer<ElementType>::TensorType;
+    using Shape = typename Layer<ElementType>::Shape;
+
+    // Verify PoolingLayer with no padding
+    TensorType input(4, 4, 2);
+    input.Fill(1);
+    input(1, 1, 0) = 10;
+    input(0, 2, 0) = 20;
+    input(2, 0, 0) = 30;
+    input(3, 3, 0) = 40;
+    input(1, 1, 1) = 11;
+    input(0, 2, 1) = 21;
+    input(2, 0, 1) = 31;
+    input(3, 3, 1) = 41;
+    Shape outputShape = { 4, 4, 2 };
+    LayerParameters parameters{ input, NoPadding(), outputShape, ZeroPadding(1) };
+    PoolingParameters poolingParams{ 2, 2 };
+    PoolingLayer<ElementType, MaxPoolingFunction> poolingLayer(parameters, poolingParams);
+    poolingLayer.Compute();
+    auto output = poolingLayer.GetOutput();
+
+    testing::ProcessTest("Testing PoolingLayer, values", Equals(output(1, 1, 0), 10) && Equals(output(1, 2, 0), 20) && Equals(output(2, 1, 0), 30) && Equals(output(2, 2, 0), 40) && Equals(output(1, 1, 1), 11) && Equals(output(1, 2, 1), 21) && Equals(output(2, 1, 1), 31) && Equals(output(2, 2, 1), 41));
+    testing::ProcessTest("Testing PoolingLayer, padding", output(0, 0, 0) == 0 && output(0, 1, 0) == 0 && output(2, 3, 1) == 0 && output(3, 3, 1) == 0);
+
+    // Verify PoolingLayer with padding
+    TensorType input2 // This input must include the padding
+        {
+            { { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 } },
+            { { 0, -1 }, { 5, 6 }, { 0, 0 }, { 20, 21 }, { 0, 0 }, { 0, 0 } },
+            { { 0, 0 }, { -1, 0 }, { 10, 11 }, { 0, 0 }, { 0, 0 }, { 0, 0 } },
+            { { 0, 0 }, { 30, 31 }, { 0, 0 }, { 0, 0 }, { -1, 0 }, { 0, 0 } },
+            { { 0, 0 }, { 0, 0 }, { 0, -5 }, { 0, 0 }, { 40, 41 }, { 0, 0 } },
+            { { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, -1 }, { 0, 0 }, { 0, 0 } },
+        };
+    TensorType expected2{
+        { { 5, 6 }, { 20, 21 }, { 0, 0 } },
+        { { 30, 31 }, { 10, 11 }, { 0, 0 } },
+        { { 0, 0 }, { 0, 0 }, { 40, 41 } },
+    };
+
+    Shape outputShape2 = { 3, 3, 2 };
+    LayerParameters parameters2{ input2, ZeroPadding(1), outputShape2, NoPadding() };
+    PoolingParameters poolingParams2{ 2, 2 };
+    PoolingLayer<ElementType, MaxPoolingFunction> poolingLayer2(parameters2, poolingParams2);
+    poolingLayer2.Compute();
+    auto output2 = poolingLayer2.GetOutput();
+
+    testing::ProcessTest("Testing PoolingLayer with padding, values", output2.IsEqual(expected2));
+}
+
+template <typename ElementType>
+void ConvolutionalLayerTest()
+{
+    using namespace ell::predictors;
+    using namespace ell::predictors::neural;
+    using LayerParameters = typename Layer<ElementType>::LayerParameters;
+    using TensorType = typename Layer<ElementType>::TensorType;
+    using Shape = typename Layer<ElementType>::Shape;
+
+    // Verify ConvolutionalLayer with diagonal method
+    TensorType input(3, 4, 2); // Input includes padding --- 1 x 2 x 2 with 1 pixel of padding
+    input.Fill(0);
+    input(1, 1, 0) = 2;
+    input(1, 2, 0) = 1;
+    input(1, 1, 1) = 3;
+    input(1, 2, 1) = 2;
+    Shape outputShape = { 1, 2, 2 }; // Output has no padding: 1 x 2 x 2
+    LayerParameters parameters{ input, ZeroPadding(1), outputShape, NoPadding() };
+    ConvolutionalParameters convolutionalParams{ 3, 1, ConvolutionMethod::diagonal, 2 };
+
+    // Filter weights in `weightsVector` are in numFilters x numChannels x filterSize x filterSize order
+    // clang-format off
+    std::vector<ElementType> weightsVector {
+        1, 3, 2,   3, 1, 1,   2, 3, 1,   // Filter 1, channel 1
+        2, 4, 1,   3, 1, 2,   1, 4, 2,   // Filter 1, channel 2
+
+        1, 2, 1,   2, 3, 2,   1, 2, 1,   // Filter 2, channel 1
+        0, 3, 2,   3, 1, 2,   1, 0, 2 }; // Filter 2, channel 2
+    // clang-format on
+
+    // Filter weights in `weights` tensor are in numFilters x filterSize x filterSize x numChannels order
+    TensorType weights(outputShape.NumChannels() * convolutionalParams.receptiveField, convolutionalParams.receptiveField, input.NumChannels());
+
+    size_t vectorIndex = 0;
+    for (size_t f = 0; f < outputShape.NumChannels(); f++)
+    {
+        for (size_t k = 0; k < input.NumChannels(); k++)
+        {
+            for (size_t i = 0; i < convolutionalParams.receptiveField; i++)
+            {
+                for (size_t j = 0; j < convolutionalParams.receptiveField; j++)
+                {
+                    weights(f * convolutionalParams.receptiveField + i, j, k) = weightsVector[vectorIndex++];
+                }
+            }
+        }
+    }
+
+    // Verify ConvolutionalLayer with simple method
+    convolutionalParams.method = ConvolutionMethod::simple;
+    ConvolutionalLayer<ElementType> convolutionalLayerSimple(parameters, convolutionalParams, weights);
+    convolutionalLayerSimple.Compute();
+    auto outputSimple = convolutionalLayerSimple.GetOutput();
+    testing::ProcessTest("Testing ConvolutionalLayer (simple), values", Equals(outputSimple(0, 0, 0), 10) && Equals(outputSimple(0, 0, 1), 15) && Equals(outputSimple(0, 1, 0), 18) && Equals(outputSimple(0, 1, 1), 18));
+
+    // Verify ConvolutionalLayer with unrolled method
+    convolutionalParams.method = ConvolutionMethod::unrolled;
+    ConvolutionalLayer<ElementType> convolutionalLayerUnrolled(parameters, convolutionalParams, weights);
+    convolutionalLayerUnrolled.Compute();
+    auto outputUnrolled = convolutionalLayerUnrolled.GetOutput();
+    testing::ProcessTest("Testing ConvolutionalLayer (unrolled), values", Equals(outputUnrolled(0, 0, 0), 10) && Equals(outputUnrolled(0, 0, 1), 15) && Equals(outputUnrolled(0, 1, 0), 18) && Equals(outputUnrolled(0, 1, 1), 18));
+
+    // Verify ConvolutionalLayer with diagonal method
+    convolutionalParams.method = ConvolutionMethod::diagonal;
+    ConvolutionalLayer<ElementType> convolutionalLayerDiagonal(parameters, convolutionalParams, weights);
+    convolutionalLayerDiagonal.Compute();
+    auto outputDiagonal = convolutionalLayerDiagonal.GetOutput();
+    testing::ProcessTest("Testing ConvolutionalLayer (diagonal), values", Equals(outputDiagonal(0, 0, 0), 10) && Equals(outputDiagonal(0, 0, 1), 15) && Equals(outputDiagonal(0, 1, 0), 18) && Equals(outputDiagonal(0, 1, 1), 18));
+}
+
+template <typename ElementType>
+void BinaryConvolutionalLayerGemmTest(ell::predictors::neural::BinaryWeightsScale scale)
+{
+    using namespace ell::predictors;
+    using namespace ell::predictors::neural;
+    using LayerParameters = typename Layer<ElementType>::LayerParameters;
+    using TensorType = typename Layer<ElementType>::TensorType;
+    using Shape = typename Layer<ElementType>::Shape;
+    using DataVectorType = typename NeuralNetworkPredictor<ElementType>::DataVectorType;
+
+    // Verify BinaryConvolutionalLayer with gemm method
+    TensorType input(3, 4, 2); // Input includes padding
+    input.Fill(0);
+    input(1, 1, 0) = 2;
+    input(1, 2, 0) = 1;
+    input(1, 1, 1) = 3;
+    input(1, 2, 1) = 2;
+    Shape outputShape = { 1, 2, 2 }; // Output has no padding
+    LayerParameters parameters{ input.GetReference(), ZeroPadding(1), outputShape, NoPadding() };
+    BinaryConvolutionalParameters convolutionalParams{ 3, 1, BinaryConvolutionMethod::gemm, scale };
+    TensorType weights(convolutionalParams.receptiveField * outputShape.NumChannels(), convolutionalParams.receptiveField, input.NumChannels());
+    // clang-format off
+    std::vector<ElementType> weightsVector{   // RowMajor then depth order
+        1, 3, 2, 3, 1, 1, 2, 3, 1,
+        2, 4, 1, 3, 1, 2, 1, 4, 2,
+        1, 2, 1, 2, 3, 2, 1, 2, 1,
+        0, 3, 2, 3, 1, 2, 1, 0, 2 };
+    // clang-format on
+    size_t vectorIndex = 0;
+    for (size_t f = 0; f < outputShape.NumChannels(); f++)
+    {
+        for (size_t k = 0; k < input.NumChannels(); k++)
+        {
+            for (size_t i = 0; i < convolutionalParams.receptiveField; i++)
+            {
+                for (size_t j = 0; j < convolutionalParams.receptiveField; j++)
+                {
+                    weights(f * convolutionalParams.receptiveField + i, j, k) = weightsVector[vectorIndex++];
+                }
+            }
+        }
+    }
+
+    BinaryConvolutionalLayer<ElementType> convolutionalLayer(parameters, convolutionalParams, weights);
+    convolutionalLayer.Compute();
+    auto output = convolutionalLayer.GetOutput();
+    if (scale == ell::predictors::neural::BinaryWeightsScale::none)
+    {
+        testing::ProcessTest("Testing BinaryConvolutionalLayer (gemm) (no scaling), values", Equals(output(0, 0, 0), 4.0) && Equals(output(0, 0, 1), 4.0) && Equals(output(0, 1, 0), 4.0) && Equals(output(0, 1, 1), 4.0));
+    }
+    else
+    {
+        testing::ProcessTest("Testing BinaryConvolutionalLayer (gemm) (no scaling), values", Equals(output(0, 0, 0), 8.22222) && Equals(output(0, 0, 1), 6.44444) && Equals(output(0, 1, 0), 8.22222) && Equals(output(0, 1, 1), 6.44444));
+    }
+
+    // Verify that we can archive and unarchive the layer
+    // Put the layer in a network so we can archive it
+    using InputParameters = typename InputLayer<ElementType>::InputParameters;
+    InputParameters inputParams = { { 1, 2, 2 }, { PaddingScheme::zeros, 0 }, { 3, 4, 2 }, { PaddingScheme::zeros, 0 }, 1 };
+    auto inputLayer = std::make_unique<InputLayer<ElementType>>(inputParams);
+    typename NeuralNetworkPredictor<ElementType>::Layers layers;
+    layers.push_back(std::unique_ptr<Layer<ElementType>>(new BinaryConvolutionalLayer<ElementType>(parameters, convolutionalParams, weights)));
+    NeuralNetworkPredictor<ElementType> neuralNetwork(std::move(inputLayer), std::move(layers));
+
+    // archive the network
+    utilities::SerializationContext context;
+    NeuralNetworkPredictor<ElementType>::RegisterNeuralNetworkPredictorTypes(context);
+    RegisterNodeTypes(context);
+    std::stringstream strstream;
+    utilities::JsonArchiver archiver(strstream);
+    archiver << neuralNetwork;
+
+    // unarchive the network
+    utilities::JsonUnarchiver unarchiver(strstream, context);
+    NeuralNetworkPredictor<ElementType> archivedNetwork;
+    unarchiver >> archivedNetwork;
+
+    auto archivedOutput = neuralNetwork.Predict(DataVectorType{ 2, 1, 3, 2 });
+    if (scale == ell::predictors::neural::BinaryWeightsScale::none)
+    {
+        testing::ProcessTest("Testing archived BinaryConvolutionalLayer (gemm) (no scaling), values", Equals(archivedOutput[0], 4.0) && Equals(archivedOutput[1], 4.0) && Equals(archivedOutput[2], 4.0) && Equals(archivedOutput[3], 4.0));
+    }
+    else
+    {
+        testing::ProcessTest("Testing archived BinaryConvolutionalLayer (gemm) (mean scaling), values", Equals(archivedOutput[0], 8.22222) && Equals(archivedOutput[1], 6.44444) && Equals(archivedOutput[2], 8.22222) && Equals(archivedOutput[3], 6.44444));
+    }
+}
+
+template <typename ElementType>
+void BinaryConvolutionalLayerGemmTest()
+{
+    BinaryConvolutionalLayerGemmTest<ElementType>(ell::predictors::neural::BinaryWeightsScale::mean);
+    BinaryConvolutionalLayerGemmTest<ElementType>(ell::predictors::neural::BinaryWeightsScale::none);
+}
+
+template <typename ElementType>
+void BinaryConvolutionalLayerBitwiseTest(ell::predictors::neural::BinaryWeightsScale scale)
+{
+    using namespace ell::predictors;
+    using namespace ell::predictors::neural;
+    using LayerParameters = typename Layer<ElementType>::LayerParameters;
+    using TensorType = typename Layer<ElementType>::TensorType;
+    using Shape = typename Layer<ElementType>::Shape;
+    using DataVectorType = typename NeuralNetworkPredictor<ElementType>::DataVectorType;
+
+    // Verify BinaryConvolutionalLayer with gemm method
+    TensorType input(3, 4, 2); // Input includes padding
+    input.Fill(-1);
+    input(1, 1, 0) = 2;
+    input(1, 2, 0) = 1;
+    input(1, 1, 1) = 3;
+    input(1, 2, 1) = 2;
+    Shape outputShape = { 1, 2, 2 }; // Output has no padding
+    LayerParameters parameters{ input.GetReference(), MinusOnePadding(1), outputShape, NoPadding() };
+    BinaryConvolutionalParameters convolutionalParams{ 3, 1, BinaryConvolutionMethod::gemm, scale };
+    TensorType weights(convolutionalParams.receptiveField * outputShape.NumChannels(), convolutionalParams.receptiveField, input.NumChannels());
+    // clang-format off
+    std::vector<ElementType> weightsVector{   // RowMajor then depth order
+        1, 3, 2, 3, 1, 1, 2, 3, 1,
+        2, 4, 1, 3, 1, 2, 1, 4, 2,
+        1, 2, 1, 2, 3, 2, 1, 2, 1,
+        0, 3, 2, 3, 1, 2, 1, 0, 2 };
+    // clang-format on
+    size_t vectorIndex = 0;
+    for (size_t f = 0; f < outputShape.NumChannels(); f++)
+    {
+        for (size_t k = 0; k < input.NumChannels(); k++)
+        {
+            for (size_t i = 0; i < convolutionalParams.receptiveField; i++)
+            {
+                for (size_t j = 0; j < convolutionalParams.receptiveField; j++)
+                {
+                    weights(f * convolutionalParams.receptiveField + i, j, k) = weightsVector[vectorIndex++];
+                }
+            }
+        }
+    }
+
+    // Verify BinaryConvolutionalLayer with bitwise method. Since we're doing bitwise operations, change the padding scheme to be zeros.
+    convolutionalParams.method = BinaryConvolutionMethod::bitwise;
+    parameters.inputPaddingParameters.paddingScheme = PaddingScheme::zeros;
+    input.Fill(0);
+    input(1, 1, 0) = 2;
+    input(1, 2, 0) = 1;
+    input(1, 1, 1) = 3;
+    input(1, 2, 1) = 2;
+
+    BinaryConvolutionalLayer<ElementType> convolutionalLayer(parameters, convolutionalParams, weights);
+    convolutionalLayer.Compute();
+    auto output = convolutionalLayer.GetOutput();
+    if (scale == ell::predictors::neural::BinaryWeightsScale::none)
+    {
+        testing::ProcessTest("Testing BinaryConvolutionalLayer (bitwise) (mean scaling), values", Equals(output(0, 0, 0), 4.0) && Equals(output(0, 0, 1), 4.0) && Equals(output(0, 1, 0), 4.0) && Equals(output(0, 1, 1), 4.0));
+    }
+    else
+    {
+        testing::ProcessTest("Testing BinaryConvolutionalLayer (bitwise) (no scaling), values", Equals(output(0, 0, 0), 8.22222) && Equals(output(0, 0, 1), 6.44444) && Equals(output(0, 1, 0), 8.22222) && Equals(output(0, 1, 1), 6.44444));
+    }
+
+    // Put the layer in a network so we can archive it
+    using InputParameters = typename InputLayer<ElementType>::InputParameters;
+    InputParameters inputParams = { { 1, 2, 2 }, { PaddingScheme::zeros, 0 }, { 3, 4, 2 }, { PaddingScheme::zeros, 0 }, 1 };
+    auto inputLayer = std::make_unique<InputLayer<ElementType>>(inputParams);
+    typename NeuralNetworkPredictor<ElementType>::Layers layers;
+    layers.push_back(std::unique_ptr<Layer<ElementType>>(new BinaryConvolutionalLayer<ElementType>(parameters, convolutionalParams, weights)));
+    NeuralNetworkPredictor<ElementType> neuralNetwork(std::move(inputLayer), std::move(layers));
+
+    // archive the network
+    utilities::SerializationContext context;
+    NeuralNetworkPredictor<ElementType>::RegisterNeuralNetworkPredictorTypes(context);
+    RegisterNodeTypes(context);
+    std::stringstream strstream;
+    utilities::JsonArchiver archiver(strstream);
+    archiver << neuralNetwork;
+
+    // unarchive the network
+    utilities::JsonUnarchiver unarchiver(strstream, context);
+    NeuralNetworkPredictor<ElementType> archivedNetwork;
+    unarchiver >> archivedNetwork;
+
+    auto archivedOutput = neuralNetwork.Predict(DataVectorType{ 2, 1, 3, 2 });
+    if (scale == ell::predictors::neural::BinaryWeightsScale::none)
+    {
+        testing::ProcessTest("Testing archived BinaryConvolutionalLayer (bitwise) (no scaling), values", Equals(archivedOutput[0], 4.0) && Equals(archivedOutput[1], 4.0) && Equals(archivedOutput[2], 4.0) && Equals(archivedOutput[3], 4.0));
+    }
+    else
+    {
+        testing::ProcessTest("Testing archived BinaryConvolutionalLayer (gemm) (mean scaling), values", Equals(archivedOutput[0], 8.22222) && Equals(archivedOutput[1], 6.44444) && Equals(archivedOutput[2], 8.22222) && Equals(archivedOutput[3], 6.44444));
+    }
+}
+
+template <typename ElementType>
+void BinaryConvolutionalLayerBitwiseTest()
+{
+    BinaryConvolutionalLayerBitwiseTest<ElementType>(ell::predictors::neural::BinaryWeightsScale::mean);
+    BinaryConvolutionalLayerBitwiseTest<ElementType>(ell::predictors::neural::BinaryWeightsScale::none);
+}
+
+template <typename ElementType>
+void SoftmaxLayerTest()
+{
+    using namespace ell::predictors;
+    using namespace ell::predictors::neural;
+    using LayerParameters = typename Layer<ElementType>::LayerParameters;
+    using TensorType = typename Layer<ElementType>::TensorType;
+    using Shape = typename Layer<ElementType>::Shape;
+
+    // Verify BiasLayer
+    TensorType input(1, 1, 3);
+    input(0, 0, 0) = 1;
+    input(0, 0, 1) = 2;
+    input(0, 0, 2) = 3;
+    Shape outputShape = { 3, 3, 3 };
+    LayerParameters parameters{ input, NoPadding(), outputShape, ZeroPadding(1) };
+
+    SoftmaxLayer<ElementType> softmaxLayer(parameters);
+    softmaxLayer.Compute();
+    auto output = softmaxLayer.GetOutput();
+    testing::ProcessTest("Testing SoftmaxLayer, values", Equals(output(1, 1, 0), 0.0900305733) && Equals(output(1, 1, 1), 0.244728476) && Equals(output(1, 1, 2), 0.665240943));
+    testing::ProcessTest("Testing SoftmaxLayer, padding", output(0, 0, 0) == 0 && output(0, 1, 0) == 0 && output(2, 2, 0) == 0 && output(2, 2, 1) == 0);
+}
+
+template <typename ElementType>
+void NeuralNetworkPredictorTest()
+{
+    using namespace ell::predictors;
+    using namespace ell::predictors::neural;
+    using InputParameters = typename InputLayer<ElementType>::InputParameters;
+    using LayerParameters = typename Layer<ElementType>::LayerParameters;
+    using VectorType = typename Layer<ElementType>::VectorType;
+    using MatrixType = typename Layer<ElementType>::MatrixType;
+    using DataVectorType = typename NeuralNetworkPredictor<ElementType>::DataVectorType;
+
+    // Build an XOR net from previously trained values.
+    typename NeuralNetworkPredictor<ElementType>::InputLayerReference inputLayer;
+    typename NeuralNetworkPredictor<ElementType>::Layers layers;
+
+    InputParameters inputParams = { { 1, 1, 2 }, { PaddingScheme::zeros, 0 }, { 1, 1, 2 }, { PaddingScheme::zeros, 0 }, 1 };
+    inputLayer = std::make_unique<InputLayer<ElementType>>(inputParams);
+
+    LayerParameters layerParameters{ inputLayer->GetOutput(), NoPadding(), { 1, 1, 3 }, NoPadding() };
+    MatrixType weights1(3, 2);
+    weights1(0, 0) = -0.97461396f;
+    weights1(0, 1) = 1.40845299f;
+    weights1(1, 0) = -0.14135513f;
+    weights1(1, 1) = -0.54136097f;
+    weights1(2, 0) = 0.99313086f;
+    weights1(2, 1) = -0.99083692f;
+    layers.push_back(std::unique_ptr<Layer<ElementType>>(new FullyConnectedLayer<ElementType>(layerParameters, weights1)));
+
+    layerParameters = { layers[0]->GetOutput(), NoPadding(), { 1, 1, 3 }, NoPadding() };
+    VectorType bias1({ -0.43837756f, -0.90868396f, -0.0323102f });
+    layers.push_back(std::unique_ptr<Layer<ElementType>>(new BiasLayer<ElementType>(layerParameters, bias1)));
+
+    layerParameters = { layers[1]->GetOutput(), NoPadding(), { 1, 1, 3 }, NoPadding() };
+    layers.push_back(std::unique_ptr<Layer<ElementType>>(new ActivationLayer<ElementType>(layerParameters, new ReLUActivation<ElementType>())));
+
+    layerParameters = { layers[2]->GetOutput(), NoPadding(), { 1, 1, 1 }, NoPadding() };
+    MatrixType weights2(1, 3);
+    weights2(0, 0) = 1.03084767f;
+    weights2(0, 1) = -0.10772263f;
+    weights2(0, 2) = 1.04077697f;
+    layers.push_back(std::unique_ptr<Layer<ElementType>>(new FullyConnectedLayer<ElementType>(layerParameters, weights2)));
+
+    layerParameters = { layers[3]->GetOutput(), NoPadding(), { 1, 1, 1 }, NoPadding() };
+    VectorType bias2({ 1.40129846e-20f });
+    layers.push_back(std::unique_ptr<Layer<ElementType>>(new BiasLayer<ElementType>(layerParameters, bias2)));
+
+    NeuralNetworkPredictor<ElementType> neuralNetwork(std::move(inputLayer), std::move(layers));
+    std::vector<ElementType> output;
+
+    // Check  the result for the 4 permutations of input. This validates that:
+    // - the weights loaded correctly.
+    // - the operations in each layer are working correctly
+    // - the feed forward logic is working correctly
+
+    output = neuralNetwork.Predict(DataVectorType({ 0, 0 }));
+    testing::ProcessTest("Testing NeuralNetworkPredictor, Predict of XOR net for 0 0 ", Equals(output[0], 0.0));
+
+    output = neuralNetwork.Predict(DataVectorType({ 0, 1 }));
+    testing::ProcessTest("Testing NeuralNetworkPredictor, Predict of XOR net for 0 1 ", Equals(output[0], 1.0));
+
+    output = neuralNetwork.Predict(DataVectorType({ 1, 0 }));
+    testing::ProcessTest("Testing NeuralNetworkPredictor, Predict of XOR net for 1 0 ", Equals(output[0], 1.0));
+
+    output = neuralNetwork.Predict(DataVectorType({ 1, 1 }));
+    testing::ProcessTest("Testing NeuralNetworkPredictor, Predict of XOR net for 1 1 ", Equals(output[0], 0.0));
+
+    // Verify that we can archive and unarchive the predictor
+    utilities::SerializationContext context;
+    NeuralNetworkPredictor<ElementType>::RegisterNeuralNetworkPredictorTypes(context);
+    RegisterNodeTypes(context);
+    std::stringstream strstream;
+    utilities::JsonArchiver archiver(strstream);
+    archiver << neuralNetwork;
+    utilities::JsonUnarchiver unarchiver(strstream, context);
+
+    NeuralNetworkPredictor<ElementType> neuralNetwork2;
+    unarchiver >> neuralNetwork2;
+
+    output = neuralNetwork2.Predict(DataVectorType({ 0, 0 }));
+    testing::ProcessTest("Testing NeuralNetworkPredictor from archive, Predict of XOR net for 0 0 ", Equals(output[0], 0.0));
+
+    output = neuralNetwork2.Predict(DataVectorType({ 0, 1 }));
+    testing::ProcessTest("Testing NeuralNetworkPredictor from archive, Predict of XOR net for 0 1 ", Equals(output[0], 1.0));
+
+    output = neuralNetwork2.Predict(DataVectorType({ 1, 0 }));
+    testing::ProcessTest("Testing NeuralNetworkPredictor from archive, Predict of XOR net for 1 0 ", Equals(output[0], 1.0));
+
+    output = neuralNetwork2.Predict(DataVectorType({ 1, 1 }));
+    testing::ProcessTest("Testing NeuralNetworkPredictor from archive, Predict of XOR net for 1 1 ", Equals(output[0], 0.0));
+
+    // Remove the last 2 layers, (Dense and Bias)
+    neuralNetwork2.RemoveLastLayers(2);
+    output = neuralNetwork2.Predict(DataVectorType({ 0, 1 }));
+    testing::ProcessTest("Testing cut NeuralNetworkPredictor, predict for 0 1 ", Equals(output[0], 0.970072031) && Equals(output[1], 0.0) && Equals(output[2], 0.0));
+}
+
+template <typename ElementType>
+void FillTensor(ell::math::ChannelColumnRowTensor<ElementType>& tensor, int startValue = 0)
+{
+    int val = startValue;
+    tensor.Generate([&val]() { return val++; });
+}
+
+template <typename ElementType>
+void FillVector(ell::math::ColumnVector<ElementType>& vector, int startValue = 0)
+{
+    int val = startValue;
+    vector.Generate([&val]() { return val++; });
+}
+
+template <typename ElementType>
+void ConvolutionalArchiveTest()
+{
+    using namespace ell::predictors;
+    using namespace ell::predictors::neural;
+    using InputParameters = typename InputLayer<ElementType>::InputParameters;
+    using LayerParameters = typename Layer<ElementType>::LayerParameters;
+    using TensorType = typename Layer<ElementType>::TensorType;
+    using DataVectorType = typename NeuralNetworkPredictor<ElementType>::DataVectorType;
+
+    // Build a net
+    typename NeuralNetworkPredictor<ElementType>::InputLayerReference inputLayer;
+    typename NeuralNetworkPredictor<ElementType>::Layers layers;
+
+    InputParameters inputParams = { { 3, 3, 3 }, { PaddingScheme::zeros, 0 }, { 5, 5, 3 }, { PaddingScheme::zeros, 1 }, 1 };
+    inputLayer = std::make_unique<InputLayer<ElementType>>(inputParams);
+
+    LayerParameters layerParameters{ inputLayer->GetOutput(), { PaddingScheme::zeros, 1 }, { 3, 3, 8 }, NoPadding() };
+    auto convolutionMethod = ConvolutionMethod::unrolled;
+    ConvolutionalParameters convolutionalParams{ 3, 1, convolutionMethod, 1 };
+    TensorType convWeights1(8 * 3, 3, 3);
+    FillTensor(convWeights1);
+    layers.push_back(std::unique_ptr<Layer<ElementType>>(new ConvolutionalLayer<ElementType>(layerParameters, convolutionalParams, convWeights1)));
+
+    NeuralNetworkPredictor<ElementType> neuralNetwork(std::move(inputLayer), std::move(layers));
+    std::vector<double> input(3 * 3 * 3);
+    int val = 0;
+    std::generate(input.begin(), input.end(), [&val]() { return val++; });
+
+    utilities::SerializationContext context;
+    NeuralNetworkPredictor<ElementType>::RegisterNeuralNetworkPredictorTypes(context);
+    RegisterNodeTypes(context);
+    std::stringstream strstream;
+    utilities::JsonArchiver archiver(strstream);
+    archiver << neuralNetwork;
+
+    utilities::JsonUnarchiver unarchiver(strstream, context);
+    NeuralNetworkPredictor<ElementType> neuralNetwork2;
+    unarchiver >> neuralNetwork2;
+    auto output = neuralNetwork.Predict(DataVectorType(input));
+    auto output2 = neuralNetwork2.Predict(DataVectorType(input));
+    testing::ProcessTest("Testing Convolutional predictor from archive", testing::IsEqual(output, output2));
+}
+
+template <typename ElementType>
+void BinaryConvolutionalArchiveTest()
+{
+    using namespace ell::predictors;
+    using namespace ell::predictors::neural;
+    using InputParameters = typename InputLayer<ElementType>::InputParameters;
+    using LayerParameters = typename Layer<ElementType>::LayerParameters;
+    using TensorType = typename Layer<ElementType>::TensorType;
+    using DataVectorType = typename NeuralNetworkPredictor<ElementType>::DataVectorType;
+
+    // Build a net
+    typename NeuralNetworkPredictor<ElementType>::InputLayerReference inputLayer;
+    typename NeuralNetworkPredictor<ElementType>::Layers layers;
+
+    InputParameters inputParams = { { 3, 3, 3 }, { PaddingScheme::zeros, 0 }, { 5, 5, 3 }, { PaddingScheme::zeros, 1 }, 1 };
+    inputLayer = std::make_unique<InputLayer<ElementType>>(inputParams);
+
+    LayerParameters layerParameters{ inputLayer->GetOutput(), { PaddingScheme::zeros, 1 }, { 3, 3, 8 }, NoPadding() };
+    BinaryConvolutionalParameters convolutionalParams{ 3, 1, BinaryConvolutionMethod::bitwise, BinaryWeightsScale::mean };
+    TensorType convWeights1(8 * 3, 3, 3);
+    FillTensor(convWeights1);
+    layers.push_back(std::unique_ptr<Layer<ElementType>>(new BinaryConvolutionalLayer<ElementType>(layerParameters, convolutionalParams, convWeights1)));
+
+    NeuralNetworkPredictor<ElementType> neuralNetwork(std::move(inputLayer), std::move(layers));
+    std::vector<double> input(3 * 3 * 3);
+    int val = 0;
+    std::generate(input.begin(), input.end(), [&val]() { return val++; });
+
+    utilities::SerializationContext context;
+    NeuralNetworkPredictor<ElementType>::RegisterNeuralNetworkPredictorTypes(context);
+    RegisterNodeTypes(context);
+    std::stringstream strstream;
+    utilities::JsonArchiver archiver(strstream);
+    archiver << neuralNetwork;
+
+    utilities::JsonUnarchiver unarchiver(strstream, context);
+    NeuralNetworkPredictor<ElementType> neuralNetwork2;
+    unarchiver >> neuralNetwork2;
+    auto output = neuralNetwork.Predict(DataVectorType(input));
+    auto output2 = neuralNetwork2.Predict(DataVectorType(input));
+    testing::ProcessTest("Testing Binary convolutional predictor from archive", testing::IsEqual(output, output2));
+}
+
+#pragma endregion implementation
diff --git a/libraries/predictors/test/tcc/LinearPredictorTests.tcc b/libraries/predictors/test/tcc/LinearPredictorTests.tcc
deleted file mode 100644
index b65b56e3b..000000000
--- a/libraries/predictors/test/tcc/LinearPredictorTests.tcc
+++ /dev/null
@@ -1,31 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     LinearPredictorTests.tcc (predictors_test)
-//  Authors:  Byron Changuion
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#include <predictors/include/LinearPredictor.h>
-
-#include <testing/include/testing.h>
-
-using namespace ell;
-
-template <typename ElementType>
-void LinearPredictorTest()
-{
-    // test that default constructor works.
-    predictors::LinearPredictor<ElementType> predictor0;
-    testing::ProcessTest("DefaultLinearPredictor", testing::IsEqual(predictor0.GetBias(), static_cast<ElementType>(0)));
-
-    // now test a known predictor.
-    using DataVectorType = typename predictors::LinearPredictor<ElementType>::DataVectorType;
-    math::ColumnVector<ElementType> weights({ 1, 2, 3, 4, 5 });
-    ElementType bias = 1.5;
-
-    predictors::LinearPredictor<ElementType> predictor(weights, bias);
-    auto result = predictor.Predict(DataVectorType{ 1.0, 2.0, 1.0, -1.0, 0.5 });
-
-    testing::ProcessTest("TestLinearPredictor", testing::IsEqual(result, static_cast<ElementType>(8.0)));
-}
diff --git a/libraries/predictors/test/tcc/NeuralNetworkPredictorTests.tcc b/libraries/predictors/test/tcc/NeuralNetworkPredictorTests.tcc
deleted file mode 100644
index 4c89202df..000000000
--- a/libraries/predictors/test/tcc/NeuralNetworkPredictorTests.tcc
+++ /dev/null
@@ -1,849 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     NeuralNetworkPredictorTests.tcc (predictors_test)
-//  Authors:  Byron Changuion
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#include <common/include/LoadModel.h>
-
-#include <predictors/include/NeuralNetworkPredictor.h>
-#include <predictors/neural/include/HardSigmoidActivation.h>
-#include <predictors/neural/include/LeakyReLUActivation.h>
-#include <predictors/neural/include/MaxPoolingFunction.h>
-#include <predictors/neural/include/ParametricReLUActivation.h>
-#include <predictors/neural/include/ReLUActivation.h>
-#include <predictors/neural/include/SigmoidActivation.h>
-#include <predictors/neural/include/SoftMaxActivation.h>
-#include <predictors/neural/include/TanhActivation.h>
-
-#include <testing/include/testing.h>
-
-#include <utilities/include/JsonArchiver.h>
-
-using namespace ell;
-using namespace ell::common;
-
-inline bool Equals(double a, double b)
-{
-    if (std::abs(a - b) < 0.0001)
-        return true;
-    return false;
-}
-
-template <typename ElementType>
-void ActivationTest()
-{
-    using namespace ell::predictors;
-    using namespace ell::predictors::neural;
-    using TensorType = typename Layer<ElementType>::TensorType;
-
-    TensorType T0(2, 2, 2);
-    T0(0, 0, 0) = static_cast<ElementType>(1.0);
-    T0(0, 1, 0) = static_cast<ElementType>(-2.0);
-    T0(1, 0, 1) = static_cast<ElementType>(3.0);
-    T0(1, 1, 1) = static_cast<ElementType>(-4.0);
-
-    TensorType T1(2, 2, 2);
-
-    auto hardSigmoid = HardSigmoidActivation<ElementType>();
-    for (size_t i = 0; i < T0.NumRows(); ++i)
-    {
-        for (size_t j = 0; j < T0.NumColumns(); ++j)
-        {
-            for (size_t k = 0; k < T0.NumChannels(); ++k)
-            {
-                T1(i, j, k) = hardSigmoid.ApplyIndex(T0(i, j, k), { i, j, k });
-            }
-        }
-    }
-    testing::ProcessTest("Testing HardSigmoidActivation", Equals(T1(0, 0, 0), 0.7) && Equals(T1(0, 1, 0), 0.1) && T1(1, 0, 1) == 1 && T1(1, 1, 1) == 0 && T1(0, 0, 1) == 0.5 && T1(0, 1, 1) == 0.5 && T1(1, 0, 0) == 0.5 && T1(1, 1, 0) == 0.5);
-
-    auto relu = ReLUActivation<ElementType>();
-    for (size_t i = 0; i < T0.NumRows(); ++i)
-    {
-        for (size_t j = 0; j < T0.NumColumns(); ++j)
-        {
-            for (size_t k = 0; k < T0.NumChannels(); ++k)
-            {
-                T1(i, j, k) = relu.ApplyIndex(T0(i, j, k), { i, j, k });
-            }
-        }
-    }
-    testing::ProcessTest("Testing ReLUActivation", T1(0, 0, 0) == 1.0 && T1(0, 1, 0) == 0 && T1(1, 0, 1) == 3.0 && T1(1, 1, 1) == 0);
-
-    auto leakyRelu = LeakyReLUActivation<ElementType>(static_cast<ElementType>(0.1));
-    for (size_t i = 0; i < T0.NumRows(); ++i)
-    {
-        for (size_t j = 0; j < T0.NumColumns(); ++j)
-        {
-            for (size_t k = 0; k < T0.NumChannels(); ++k)
-            {
-                T1(i, j, k) = leakyRelu.ApplyIndex(T0(i, j, k), { i, j, k });
-            }
-        }
-    }
-    testing::ProcessTest("Testing LeakyReLUActivation", Equals(T1(0, 0, 0), 1.0) && Equals(T1(0, 1, 0), -0.2) && Equals(T1(1, 0, 1), 3.0) && Equals(T1(1, 1, 1), -0.4));
-
-    TensorType alpha(2, 2, 2);
-    alpha(0, 0, 0) = static_cast<ElementType>(0.1);
-    alpha(0, 1, 0) = static_cast<ElementType>(0.2);
-    alpha(1, 0, 1) = static_cast<ElementType>(0.3);
-    alpha(1, 1, 1) = static_cast<ElementType>(0.4);
-
-    auto parametricRelu = ParametricReLUActivation<ElementType>(alpha);
-    for (size_t i = 0; i < T0.NumRows(); ++i)
-    {
-        for (size_t j = 0; j < T0.NumColumns(); ++j)
-        {
-            for (size_t k = 0; k < T0.NumChannels(); ++k)
-            {
-                T1(i, j, k) = parametricRelu.ApplyIndex(T0(i, j, k), { i, j, k });
-            }
-        }
-    }
-    testing::ProcessTest("Testing ParametricReLUActivation", Equals(T1(0, 0, 0), 1.0) && Equals(T1(0, 1, 0), -0.4) && Equals(T1(1, 0, 1), 3.0) && Equals(T1(1, 1, 1), -1.6));
-
-    auto sigmoid = SigmoidActivation<ElementType>();
-    for (size_t i = 0; i < T0.NumRows(); ++i)
-    {
-        for (size_t j = 0; j < T0.NumColumns(); ++j)
-        {
-            for (size_t k = 0; k < T0.NumChannels(); ++k)
-            {
-                T1(i, j, k) = sigmoid.ApplyIndex(T0(i, j, k), { i, j, k });
-            }
-        }
-    }
-    testing::ProcessTest("Testing SigmoidActivation", Equals(T1(0, 0, 0), 0.73106) && Equals(T1(0, 1, 0), 0.11920) && Equals(T1(1, 0, 1), 0.95257) && Equals(T1(1, 1, 1), 0.017986));
-}
-
-template <typename ElementType>
-void LayerBaseTest()
-{
-    using namespace ell::predictors;
-    using namespace ell::predictors::neural;
-    using LayerParameters = typename Layer<ElementType>::LayerParameters;
-    using TensorType = typename Layer<ElementType>::TensorType;
-    using Shape = typename Layer<ElementType>::Shape;
-
-    // Verify LayerBase
-    TensorType input0(12, 12, 3);
-    PaddingParameters paddingParameters2{ PaddingScheme::alternatingZeroAndOnes, 1 };
-    Shape outputShape = { 12, 12, 6 };
-    LayerParameters layerParameters{ input0, ZeroPadding(1), outputShape, paddingParameters2 };
-
-    Layer<ElementType> baseLayer(layerParameters);
-    auto layerBaseOutput = baseLayer.GetOutput();
-    testing::ProcessTest("Testing LayerBase, output tensor", layerBaseOutput.NumRows() == 12 && layerBaseOutput.NumColumns() == 12 && layerBaseOutput.NumChannels() == 6);
-    testing::ProcessTest("Testing LayerBase, output tensor padding values", layerBaseOutput(0, 0, 0) == 0 && layerBaseOutput(0, 1, 0) == 1 && layerBaseOutput(0, 2, 0) == 0 && layerBaseOutput(0, 3, 0) == 1);
-}
-
-template <typename ElementType>
-void ActivationLayerTest()
-{
-    using namespace ell::predictors;
-    using namespace ell::predictors::neural;
-    using LayerParameters = typename Layer<ElementType>::LayerParameters;
-    using TensorType = typename Layer<ElementType>::TensorType;
-    using Shape = typename Layer<ElementType>::Shape;
-
-    // Verify ActivationLayer
-    TensorType activationInput(2, 2, 2);
-    activationInput(0, 0, 0) = 1.0;
-    activationInput(0, 1, 0) = -2.0;
-    activationInput(1, 0, 1) = 3.0;
-    activationInput(1, 1, 1) = -4.0;
-    Shape activationOutputShape = { 4, 4, 2 };
-    LayerParameters activationParameters{ activationInput, NoPadding(), activationOutputShape, ZeroPadding(1) };
-
-    ActivationLayer<ElementType> activationLayer(activationParameters, new ReLUActivation<ElementType>());
-    activationLayer.Compute();
-    auto output0 = activationLayer.GetOutput();
-    testing::ProcessTest("Testing ActivationLayer, values", output0(1, 1, 0) == 1.0 && output0(1, 2, 0) == 0 && output0(2, 1, 1) == 3.0 && output0(2, 2, 1) == 0);
-    testing::ProcessTest("Testing ActivationLayer, padding", output0(0, 0, 0) == 0 && output0(0, 1, 0) == 0 && output0(2, 3, 1) == 0 && output0(3, 3, 1) == 0);
-}
-
-template <typename ElementType>
-void BatchNormalizationLayerTest()
-{
-    using namespace ell::predictors;
-    using namespace ell::predictors::neural;
-    using LayerParameters = typename Layer<ElementType>::LayerParameters;
-    using TensorType = typename Layer<ElementType>::TensorType;
-    using Shape = typename Layer<ElementType>::Shape;
-    using VectorType = typename Layer<ElementType>::VectorType;
-
-    // Verify BatchNormailzationLayer
-    TensorType bnInput(2, 2, 2);
-    bnInput(0, 0, 0) = 11;
-    bnInput(0, 1, 0) = 7;
-    bnInput(1, 0, 1) = 30;
-    bnInput(1, 1, 1) = 50;
-    Shape bnOutputShape = { 4, 4, 2 };
-    LayerParameters bnParameters{ bnInput, NoPadding(), bnOutputShape, ZeroPadding(1) };
-    VectorType mean({ 5, 10 });
-    VectorType variance({ 4.0, 16.0 });
-
-    BatchNormalizationLayer<ElementType> bnLayer(bnParameters, mean, variance, static_cast<ElementType>(1e-6), EpsilonSummand::SqrtVariance);
-    bnLayer.Compute();
-    auto output1 = bnLayer.GetOutput();
-    testing::ProcessTest("Testing BatchNormailzationLayer, values", Equals(output1(1, 1, 0), 3.0) && Equals(output1(1, 2, 0), 1.0) && Equals(output1(2, 1, 1), 5.0) && Equals(output1(2, 2, 1), 10.0));
-    testing::ProcessTest("Testing BatchNormailzationLayer, padding", output1(0, 0, 0) == 0 && output1(0, 1, 0) == 0 && output1(2, 3, 1) == 0 && output1(3, 3, 1) == 0);
-}
-
-template <typename ElementType>
-void BiasLayerTest()
-{
-    using namespace ell::predictors;
-    using namespace ell::predictors::neural;
-    using LayerParameters = typename Layer<ElementType>::LayerParameters;
-    using TensorType = typename Layer<ElementType>::TensorType;
-    using Shape = typename Layer<ElementType>::Shape;
-    using VectorType = typename Layer<ElementType>::VectorType;
-
-    // Verify BiasLayer
-    TensorType input(2, 2, 2);
-    input(0, 0, 0) = 1;
-    input(0, 1, 0) = 2;
-    input(1, 0, 1) = 3;
-    input(1, 1, 1) = 4;
-    Shape outputShape = { 4, 4, 2 };
-    LayerParameters parameters{ input, NoPadding(), outputShape, ZeroPadding(1) };
-    VectorType bias({ 5, 10 });
-
-    BiasLayer<ElementType> biasLayer(parameters, bias);
-    biasLayer.Compute();
-    auto output = biasLayer.GetOutput();
-    testing::ProcessTest("Testing BiasLayer, values", Equals(output(1, 1, 0), 6.0) && Equals(output(1, 2, 0), 7.0) && Equals(output(2, 1, 1), 13.0) && Equals(output(2, 2, 1), 14.0));
-    testing::ProcessTest("Testing BiasLayer, padding", output(0, 0, 0) == 0 && output(0, 1, 0) == 0 && output(2, 3, 1) == 0 && output(3, 3, 1) == 0);
-}
-
-template <typename ElementType>
-void InputLayerTest()
-{
-    using namespace ell::predictors;
-    using namespace ell::predictors::neural;
-    using Shape = typename Layer<ElementType>::Shape;
-
-    // Verify Input
-    Shape inputShape = { 2, 2, 2 };
-    Shape outputShape = { 4, 4, 2 };
-    typename InputLayer<ElementType>::InputParameters parameters{ inputShape, NoPadding(), outputShape, ZeroPadding(1), 2.0 };
-
-    InputLayer<ElementType> inputLayer(parameters);
-    inputLayer.SetInput(std::vector<ElementType>({ 1, 2, 3, 4, 5, 6, 7, 8 }));
-    inputLayer.Compute();
-    auto output = inputLayer.GetOutput();
-    testing::ProcessTest("Testing InputLayer, values", Equals(output(1, 1, 0), 2.0) && Equals(output(1, 2, 0), 6.0) && Equals(output(2, 1, 1), 12.0) && Equals(output(2, 2, 1), 16.0));
-    testing::ProcessTest("Testing InputLayer, padding", output(0, 0, 0) == 0 && output(0, 1, 0) == 0 && output(2, 3, 1) == 0 && output(3, 3, 1) == 0);
-}
-
-template <typename ElementType>
-void ScalingLayerTest()
-{
-    using namespace ell::predictors;
-    using namespace ell::predictors::neural;
-    using LayerParameters = typename Layer<ElementType>::LayerParameters;
-    using TensorType = typename Layer<ElementType>::TensorType;
-    using Shape = typename Layer<ElementType>::Shape;
-    using VectorType = typename Layer<ElementType>::VectorType;
-
-    // Verify BiasLayer
-    TensorType input(2, 2, 2);
-    input(0, 0, 0) = 1;
-    input(0, 1, 0) = 2;
-    input(1, 0, 1) = 3;
-    input(1, 1, 1) = 4;
-    Shape outputShape = { 4, 4, 2 };
-    LayerParameters parameters{ input, NoPadding(), outputShape, ZeroPadding(1) };
-    VectorType scales({ 2, 0.5 });
-
-    ScalingLayer<ElementType> scalingLayer(parameters, scales);
-    scalingLayer.Compute();
-    auto output = scalingLayer.GetOutput();
-    testing::ProcessTest("Testing ScalingLayer, values", Equals(output(1, 1, 0), 2.0) && Equals(output(1, 2, 0), 4) && Equals(output(2, 1, 1), 1.5) && Equals(output(2, 2, 1), 2.0));
-    testing::ProcessTest("Testing ScalingLayer, padding", output(0, 0, 0) == 0 && output(0, 1, 0) == 0 && output(2, 3, 1) == 0 && output(3, 3, 1) == 0);
-}
-
-template <typename ElementType>
-void FullyConnectedLayerTest()
-{
-    using namespace ell::predictors;
-    using namespace ell::predictors::neural;
-    using LayerParameters = typename Layer<ElementType>::LayerParameters;
-    using TensorType = typename Layer<ElementType>::TensorType;
-    using Shape = typename Layer<ElementType>::Shape;
-    using MatrixType = typename Layer<ElementType>::MatrixType;
-
-    // Verify FullyConnectedLayer
-    TensorType input(2, 2, 1);
-    input.Fill(1);
-    Shape outputShape = { 3, 5, 1 };
-    LayerParameters parameters{ input, NoPadding(), outputShape, ZeroPadding(1) };
-    MatrixType weights(3, 4);
-    weights(0, 0) = 1;
-    weights(0, 1) = 1;
-    weights(0, 2) = 1;
-    weights(0, 3) = 2;
-    weights(1, 0) = 1;
-    weights(1, 1) = 1;
-    weights(1, 2) = 1;
-    weights(1, 3) = 3;
-    weights(2, 0) = 1;
-    weights(2, 1) = 1;
-    weights(2, 2) = 1;
-    weights(2, 3) = 4;
-
-    FullyConnectedLayer<ElementType> connectedLayer(parameters, weights);
-    connectedLayer.Compute();
-    auto output = connectedLayer.GetOutput();
-    testing::ProcessTest("Testing FullyConnectedLayer, values", Equals(output(1, 1, 0), 5.0) && Equals(output(1, 2, 0), 6.0) && Equals(output(1, 3, 0), 7.0));
-    testing::ProcessTest("Testing FullyConnectedLayer, padding", output(0, 0, 0) == 0 && output(0, 1, 0) == 0 && output(1, 4, 0) == 0 && output(2, 4, 0) == 0);
-}
-
-template <typename ElementType>
-void PoolingLayerTest()
-{
-    using namespace ell::predictors;
-    using namespace ell::predictors::neural;
-    using LayerParameters = typename Layer<ElementType>::LayerParameters;
-    using TensorType = typename Layer<ElementType>::TensorType;
-    using Shape = typename Layer<ElementType>::Shape;
-
-    // Verify PoolingLayer with no padding
-    TensorType input(4, 4, 2);
-    input.Fill(1);
-    input(1, 1, 0) = 10;
-    input(0, 2, 0) = 20;
-    input(2, 0, 0) = 30;
-    input(3, 3, 0) = 40;
-    input(1, 1, 1) = 11;
-    input(0, 2, 1) = 21;
-    input(2, 0, 1) = 31;
-    input(3, 3, 1) = 41;
-    Shape outputShape = { 4, 4, 2 };
-    LayerParameters parameters{ input, NoPadding(), outputShape, ZeroPadding(1) };
-    PoolingParameters poolingParams{ 2, 2 };
-    PoolingLayer<ElementType, MaxPoolingFunction> poolingLayer(parameters, poolingParams);
-    poolingLayer.Compute();
-    auto output = poolingLayer.GetOutput();
-
-    testing::ProcessTest("Testing PoolingLayer, values", Equals(output(1, 1, 0), 10) && Equals(output(1, 2, 0), 20) && Equals(output(2, 1, 0), 30) && Equals(output(2, 2, 0), 40) && Equals(output(1, 1, 1), 11) && Equals(output(1, 2, 1), 21) && Equals(output(2, 1, 1), 31) && Equals(output(2, 2, 1), 41));
-    testing::ProcessTest("Testing PoolingLayer, padding", output(0, 0, 0) == 0 && output(0, 1, 0) == 0 && output(2, 3, 1) == 0 && output(3, 3, 1) == 0);
-
-    // Verify PoolingLayer with padding
-    TensorType input2 // This input must include the padding
-        {
-            { { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 } },
-            { { 0, -1 }, { 5, 6 }, { 0, 0 }, { 20, 21 }, { 0, 0 }, { 0, 0 } },
-            { { 0, 0 }, { -1, 0 }, { 10, 11 }, { 0, 0 }, { 0, 0 }, { 0, 0 } },
-            { { 0, 0 }, { 30, 31 }, { 0, 0 }, { 0, 0 }, { -1, 0 }, { 0, 0 } },
-            { { 0, 0 }, { 0, 0 }, { 0, -5 }, { 0, 0 }, { 40, 41 }, { 0, 0 } },
-            { { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, -1 }, { 0, 0 }, { 0, 0 } },
-        };
-    TensorType expected2{
-        { { 5, 6 }, { 20, 21 }, { 0, 0 } },
-        { { 30, 31 }, { 10, 11 }, { 0, 0 } },
-        { { 0, 0 }, { 0, 0 }, { 40, 41 } },
-    };
-
-    Shape outputShape2 = { 3, 3, 2 };
-    LayerParameters parameters2{ input2, ZeroPadding(1), outputShape2, NoPadding() };
-    PoolingParameters poolingParams2{ 2, 2 };
-    PoolingLayer<ElementType, MaxPoolingFunction> poolingLayer2(parameters2, poolingParams2);
-    poolingLayer2.Compute();
-    auto output2 = poolingLayer2.GetOutput();
-
-    testing::ProcessTest("Testing PoolingLayer with padding, values", output2.IsEqual(expected2));
-}
-
-template <typename ElementType>
-void ConvolutionalLayerTest()
-{
-    using namespace ell::predictors;
-    using namespace ell::predictors::neural;
-    using LayerParameters = typename Layer<ElementType>::LayerParameters;
-    using TensorType = typename Layer<ElementType>::TensorType;
-    using Shape = typename Layer<ElementType>::Shape;
-
-    // Verify ConvolutionalLayer with diagonal method
-    TensorType input(3, 4, 2); // Input includes padding --- 1 x 2 x 2 with 1 pixel of padding
-    input.Fill(0);
-    input(1, 1, 0) = 2;
-    input(1, 2, 0) = 1;
-    input(1, 1, 1) = 3;
-    input(1, 2, 1) = 2;
-    Shape outputShape = { 1, 2, 2 }; // Output has no padding: 1 x 2 x 2
-    LayerParameters parameters{ input, ZeroPadding(1), outputShape, NoPadding() };
-    ConvolutionalParameters convolutionalParams{ 3, 1, ConvolutionMethod::diagonal, 2 };
-
-    // Filter weights in `weightsVector` are in numFilters x numChannels x filterSize x filterSize order
-    // clang-format off
-    std::vector<ElementType> weightsVector {
-        1, 3, 2,   3, 1, 1,   2, 3, 1,   // Filter 1, channel 1
-        2, 4, 1,   3, 1, 2,   1, 4, 2,   // Filter 1, channel 2
-
-        1, 2, 1,   2, 3, 2,   1, 2, 1,   // Filter 2, channel 1
-        0, 3, 2,   3, 1, 2,   1, 0, 2 }; // Filter 2, channel 2
-    // clang-format on
-
-    // Filter weights in `weights` tensor are in numFilters x filterSize x filterSize x numChannels order
-    TensorType weights(outputShape.NumChannels() * convolutionalParams.receptiveField, convolutionalParams.receptiveField, input.NumChannels());
-
-    size_t vectorIndex = 0;
-    for (size_t f = 0; f < outputShape.NumChannels(); f++)
-    {
-        for (size_t k = 0; k < input.NumChannels(); k++)
-        {
-            for (size_t i = 0; i < convolutionalParams.receptiveField; i++)
-            {
-                for (size_t j = 0; j < convolutionalParams.receptiveField; j++)
-                {
-                    weights(f * convolutionalParams.receptiveField + i, j, k) = weightsVector[vectorIndex++];
-                }
-            }
-        }
-    }
-
-    // Verify ConvolutionalLayer with simple method
-    convolutionalParams.method = ConvolutionMethod::simple;
-    ConvolutionalLayer<ElementType> convolutionalLayerSimple(parameters, convolutionalParams, weights);
-    convolutionalLayerSimple.Compute();
-    auto outputSimple = convolutionalLayerSimple.GetOutput();
-    testing::ProcessTest("Testing ConvolutionalLayer (simple), values", Equals(outputSimple(0, 0, 0), 10) && Equals(outputSimple(0, 0, 1), 15) && Equals(outputSimple(0, 1, 0), 18) && Equals(outputSimple(0, 1, 1), 18));
-
-    // Verify ConvolutionalLayer with unrolled method
-    convolutionalParams.method = ConvolutionMethod::unrolled;
-    ConvolutionalLayer<ElementType> convolutionalLayerUnrolled(parameters, convolutionalParams, weights);
-    convolutionalLayerUnrolled.Compute();
-    auto outputUnrolled = convolutionalLayerUnrolled.GetOutput();
-    testing::ProcessTest("Testing ConvolutionalLayer (unrolled), values", Equals(outputUnrolled(0, 0, 0), 10) && Equals(outputUnrolled(0, 0, 1), 15) && Equals(outputUnrolled(0, 1, 0), 18) && Equals(outputUnrolled(0, 1, 1), 18));
-
-    // Verify ConvolutionalLayer with diagonal method
-    convolutionalParams.method = ConvolutionMethod::diagonal;
-    ConvolutionalLayer<ElementType> convolutionalLayerDiagonal(parameters, convolutionalParams, weights);
-    convolutionalLayerDiagonal.Compute();
-    auto outputDiagonal = convolutionalLayerDiagonal.GetOutput();
-    testing::ProcessTest("Testing ConvolutionalLayer (diagonal), values", Equals(outputDiagonal(0, 0, 0), 10) && Equals(outputDiagonal(0, 0, 1), 15) && Equals(outputDiagonal(0, 1, 0), 18) && Equals(outputDiagonal(0, 1, 1), 18));
-}
-
-template <typename ElementType>
-void BinaryConvolutionalLayerGemmTest(ell::predictors::neural::BinaryWeightsScale scale)
-{
-    using namespace ell::predictors;
-    using namespace ell::predictors::neural;
-    using LayerParameters = typename Layer<ElementType>::LayerParameters;
-    using TensorType = typename Layer<ElementType>::TensorType;
-    using Shape = typename Layer<ElementType>::Shape;
-    using DataVectorType = typename NeuralNetworkPredictor<ElementType>::DataVectorType;
-
-    // Verify BinaryConvolutionalLayer with gemm method
-    TensorType input(3, 4, 2); // Input includes padding
-    input.Fill(0);
-    input(1, 1, 0) = 2;
-    input(1, 2, 0) = 1;
-    input(1, 1, 1) = 3;
-    input(1, 2, 1) = 2;
-    Shape outputShape = { 1, 2, 2 }; // Output has no padding
-    LayerParameters parameters{ input.GetReference(), ZeroPadding(1), outputShape, NoPadding() };
-    BinaryConvolutionalParameters convolutionalParams{ 3, 1, BinaryConvolutionMethod::gemm, scale };
-    TensorType weights(convolutionalParams.receptiveField * outputShape.NumChannels(), convolutionalParams.receptiveField, input.NumChannels());
-    // clang-format off
-    std::vector<ElementType> weightsVector{   // RowMajor then depth order
-        1, 3, 2, 3, 1, 1, 2, 3, 1,
-        2, 4, 1, 3, 1, 2, 1, 4, 2,
-        1, 2, 1, 2, 3, 2, 1, 2, 1,
-        0, 3, 2, 3, 1, 2, 1, 0, 2 };
-    // clang-format on
-    size_t vectorIndex = 0;
-    for (size_t f = 0; f < outputShape.NumChannels(); f++)
-    {
-        for (size_t k = 0; k < input.NumChannels(); k++)
-        {
-            for (size_t i = 0; i < convolutionalParams.receptiveField; i++)
-            {
-                for (size_t j = 0; j < convolutionalParams.receptiveField; j++)
-                {
-                    weights(f * convolutionalParams.receptiveField + i, j, k) = weightsVector[vectorIndex++];
-                }
-            }
-        }
-    }
-
-    BinaryConvolutionalLayer<ElementType> convolutionalLayer(parameters, convolutionalParams, weights);
-    convolutionalLayer.Compute();
-    auto output = convolutionalLayer.GetOutput();
-    if (scale == ell::predictors::neural::BinaryWeightsScale::none)
-    {
-        testing::ProcessTest("Testing BinaryConvolutionalLayer (gemm) (no scaling), values", Equals(output(0, 0, 0), 4.0) && Equals(output(0, 0, 1), 4.0) && Equals(output(0, 1, 0), 4.0) && Equals(output(0, 1, 1), 4.0));
-    }
-    else
-    {
-        testing::ProcessTest("Testing BinaryConvolutionalLayer (gemm) (no scaling), values", Equals(output(0, 0, 0), 8.22222) && Equals(output(0, 0, 1), 6.44444) && Equals(output(0, 1, 0), 8.22222) && Equals(output(0, 1, 1), 6.44444));
-    }
-
-    // Verify that we can archive and unarchive the layer
-    // Put the layer in a network so we can archive it
-    using InputParameters = typename InputLayer<ElementType>::InputParameters;
-    InputParameters inputParams = { { 1, 2, 2 }, { PaddingScheme::zeros, 0 }, { 3, 4, 2 }, { PaddingScheme::zeros, 0 }, 1 };
-    auto inputLayer = std::make_unique<InputLayer<ElementType>>(inputParams);
-    typename NeuralNetworkPredictor<ElementType>::Layers layers;
-    layers.push_back(std::unique_ptr<Layer<ElementType>>(new BinaryConvolutionalLayer<ElementType>(parameters, convolutionalParams, weights)));
-    NeuralNetworkPredictor<ElementType> neuralNetwork(std::move(inputLayer), std::move(layers));
-
-    // archive the network
-    utilities::SerializationContext context;
-    NeuralNetworkPredictor<ElementType>::RegisterNeuralNetworkPredictorTypes(context);
-    RegisterNodeTypes(context);
-    std::stringstream strstream;
-    utilities::JsonArchiver archiver(strstream);
-    archiver << neuralNetwork;
-
-    // unarchive the network
-    utilities::JsonUnarchiver unarchiver(strstream, context);
-    NeuralNetworkPredictor<ElementType> archivedNetwork;
-    unarchiver >> archivedNetwork;
-
-    auto archivedOutput = neuralNetwork.Predict(DataVectorType{ 2, 1, 3, 2 });
-    if (scale == ell::predictors::neural::BinaryWeightsScale::none)
-    {
-        testing::ProcessTest("Testing archived BinaryConvolutionalLayer (gemm) (no scaling), values", Equals(archivedOutput[0], 4.0) && Equals(archivedOutput[1], 4.0) && Equals(archivedOutput[2], 4.0) && Equals(archivedOutput[3], 4.0));
-    }
-    else
-    {
-        testing::ProcessTest("Testing archived BinaryConvolutionalLayer (gemm) (mean scaling), values", Equals(archivedOutput[0], 8.22222) && Equals(archivedOutput[1], 6.44444) && Equals(archivedOutput[2], 8.22222) && Equals(archivedOutput[3], 6.44444));
-    }
-}
-
-template <typename ElementType>
-void BinaryConvolutionalLayerGemmTest()
-{
-    BinaryConvolutionalLayerGemmTest<ElementType>(ell::predictors::neural::BinaryWeightsScale::mean);
-    BinaryConvolutionalLayerGemmTest<ElementType>(ell::predictors::neural::BinaryWeightsScale::none);
-}
-
-template <typename ElementType>
-void BinaryConvolutionalLayerBitwiseTest(ell::predictors::neural::BinaryWeightsScale scale)
-{
-    using namespace ell::predictors;
-    using namespace ell::predictors::neural;
-    using LayerParameters = typename Layer<ElementType>::LayerParameters;
-    using TensorType = typename Layer<ElementType>::TensorType;
-    using Shape = typename Layer<ElementType>::Shape;
-    using DataVectorType = typename NeuralNetworkPredictor<ElementType>::DataVectorType;
-
-    // Verify BinaryConvolutionalLayer with gemm method
-    TensorType input(3, 4, 2); // Input includes padding
-    input.Fill(-1);
-    input(1, 1, 0) = 2;
-    input(1, 2, 0) = 1;
-    input(1, 1, 1) = 3;
-    input(1, 2, 1) = 2;
-    Shape outputShape = { 1, 2, 2 }; // Output has no padding
-    LayerParameters parameters{ input.GetReference(), MinusOnePadding(1), outputShape, NoPadding() };
-    BinaryConvolutionalParameters convolutionalParams{ 3, 1, BinaryConvolutionMethod::gemm, scale };
-    TensorType weights(convolutionalParams.receptiveField * outputShape.NumChannels(), convolutionalParams.receptiveField, input.NumChannels());
-    // clang-format off
-    std::vector<ElementType> weightsVector{   // RowMajor then depth order
-        1, 3, 2, 3, 1, 1, 2, 3, 1,
-        2, 4, 1, 3, 1, 2, 1, 4, 2,
-        1, 2, 1, 2, 3, 2, 1, 2, 1,
-        0, 3, 2, 3, 1, 2, 1, 0, 2 };
-    // clang-format on
-    size_t vectorIndex = 0;
-    for (size_t f = 0; f < outputShape.NumChannels(); f++)
-    {
-        for (size_t k = 0; k < input.NumChannels(); k++)
-        {
-            for (size_t i = 0; i < convolutionalParams.receptiveField; i++)
-            {
-                for (size_t j = 0; j < convolutionalParams.receptiveField; j++)
-                {
-                    weights(f * convolutionalParams.receptiveField + i, j, k) = weightsVector[vectorIndex++];
-                }
-            }
-        }
-    }
-
-    // Verify BinaryConvolutionalLayer with bitwise method. Since we're doing bitwise operations, change the padding scheme to be zeros.
-    convolutionalParams.method = BinaryConvolutionMethod::bitwise;
-    parameters.inputPaddingParameters.paddingScheme = PaddingScheme::zeros;
-    input.Fill(0);
-    input(1, 1, 0) = 2;
-    input(1, 2, 0) = 1;
-    input(1, 1, 1) = 3;
-    input(1, 2, 1) = 2;
-
-    BinaryConvolutionalLayer<ElementType> convolutionalLayer(parameters, convolutionalParams, weights);
-    convolutionalLayer.Compute();
-    auto output = convolutionalLayer.GetOutput();
-    if (scale == ell::predictors::neural::BinaryWeightsScale::none)
-    {
-        testing::ProcessTest("Testing BinaryConvolutionalLayer (bitwise) (mean scaling), values", Equals(output(0, 0, 0), 4.0) && Equals(output(0, 0, 1), 4.0) && Equals(output(0, 1, 0), 4.0) && Equals(output(0, 1, 1), 4.0));
-    }
-    else
-    {
-        testing::ProcessTest("Testing BinaryConvolutionalLayer (bitwise) (no scaling), values", Equals(output(0, 0, 0), 8.22222) && Equals(output(0, 0, 1), 6.44444) && Equals(output(0, 1, 0), 8.22222) && Equals(output(0, 1, 1), 6.44444));
-    }
-
-    // Put the layer in a network so we can archive it
-    using InputParameters = typename InputLayer<ElementType>::InputParameters;
-    InputParameters inputParams = { { 1, 2, 2 }, { PaddingScheme::zeros, 0 }, { 3, 4, 2 }, { PaddingScheme::zeros, 0 }, 1 };
-    auto inputLayer = std::make_unique<InputLayer<ElementType>>(inputParams);
-    typename NeuralNetworkPredictor<ElementType>::Layers layers;
-    layers.push_back(std::unique_ptr<Layer<ElementType>>(new BinaryConvolutionalLayer<ElementType>(parameters, convolutionalParams, weights)));
-    NeuralNetworkPredictor<ElementType> neuralNetwork(std::move(inputLayer), std::move(layers));
-
-    // archive the network
-    utilities::SerializationContext context;
-    NeuralNetworkPredictor<ElementType>::RegisterNeuralNetworkPredictorTypes(context);
-    RegisterNodeTypes(context);
-    std::stringstream strstream;
-    utilities::JsonArchiver archiver(strstream);
-    archiver << neuralNetwork;
-
-    // unarchive the network
-    utilities::JsonUnarchiver unarchiver(strstream, context);
-    NeuralNetworkPredictor<ElementType> archivedNetwork;
-    unarchiver >> archivedNetwork;
-
-    auto archivedOutput = neuralNetwork.Predict(DataVectorType{ 2, 1, 3, 2 });
-    if (scale == ell::predictors::neural::BinaryWeightsScale::none)
-    {
-        testing::ProcessTest("Testing archived BinaryConvolutionalLayer (bitwise) (no scaling), values", Equals(archivedOutput[0], 4.0) && Equals(archivedOutput[1], 4.0) && Equals(archivedOutput[2], 4.0) && Equals(archivedOutput[3], 4.0));
-    }
-    else
-    {
-        testing::ProcessTest("Testing archived BinaryConvolutionalLayer (gemm) (mean scaling), values", Equals(archivedOutput[0], 8.22222) && Equals(archivedOutput[1], 6.44444) && Equals(archivedOutput[2], 8.22222) && Equals(archivedOutput[3], 6.44444));
-    }
-}
-
-template <typename ElementType>
-void BinaryConvolutionalLayerBitwiseTest()
-{
-    BinaryConvolutionalLayerBitwiseTest<ElementType>(ell::predictors::neural::BinaryWeightsScale::mean);
-    BinaryConvolutionalLayerBitwiseTest<ElementType>(ell::predictors::neural::BinaryWeightsScale::none);
-}
-
-template <typename ElementType>
-void SoftmaxLayerTest()
-{
-    using namespace ell::predictors;
-    using namespace ell::predictors::neural;
-    using LayerParameters = typename Layer<ElementType>::LayerParameters;
-    using TensorType = typename Layer<ElementType>::TensorType;
-    using Shape = typename Layer<ElementType>::Shape;
-
-    // Verify BiasLayer
-    TensorType input(1, 1, 3);
-    input(0, 0, 0) = 1;
-    input(0, 0, 1) = 2;
-    input(0, 0, 2) = 3;
-    Shape outputShape = { 3, 3, 3 };
-    LayerParameters parameters{ input, NoPadding(), outputShape, ZeroPadding(1) };
-
-    SoftmaxLayer<ElementType> softmaxLayer(parameters);
-    softmaxLayer.Compute();
-    auto output = softmaxLayer.GetOutput();
-    testing::ProcessTest("Testing SoftmaxLayer, values", Equals(output(1, 1, 0), 0.0900305733) && Equals(output(1, 1, 1), 0.244728476) && Equals(output(1, 1, 2), 0.665240943));
-    testing::ProcessTest("Testing SoftmaxLayer, padding", output(0, 0, 0) == 0 && output(0, 1, 0) == 0 && output(2, 2, 0) == 0 && output(2, 2, 1) == 0);
-}
-
-template <typename ElementType>
-void NeuralNetworkPredictorTest()
-{
-    using namespace ell::predictors;
-    using namespace ell::predictors::neural;
-    using InputParameters = typename InputLayer<ElementType>::InputParameters;
-    using LayerParameters = typename Layer<ElementType>::LayerParameters;
-    using VectorType = typename Layer<ElementType>::VectorType;
-    using MatrixType = typename Layer<ElementType>::MatrixType;
-    using DataVectorType = typename NeuralNetworkPredictor<ElementType>::DataVectorType;
-
-    // Build an XOR net from previously trained values.
-    typename NeuralNetworkPredictor<ElementType>::InputLayerReference inputLayer;
-    typename NeuralNetworkPredictor<ElementType>::Layers layers;
-
-    InputParameters inputParams = { { 1, 1, 2 }, { PaddingScheme::zeros, 0 }, { 1, 1, 2 }, { PaddingScheme::zeros, 0 }, 1 };
-    inputLayer = std::make_unique<InputLayer<ElementType>>(inputParams);
-
-    LayerParameters layerParameters{ inputLayer->GetOutput(), NoPadding(), { 1, 1, 3 }, NoPadding() };
-    MatrixType weights1(3, 2);
-    weights1(0, 0) = -0.97461396f;
-    weights1(0, 1) = 1.40845299f;
-    weights1(1, 0) = -0.14135513f;
-    weights1(1, 1) = -0.54136097f;
-    weights1(2, 0) = 0.99313086f;
-    weights1(2, 1) = -0.99083692f;
-    layers.push_back(std::unique_ptr<Layer<ElementType>>(new FullyConnectedLayer<ElementType>(layerParameters, weights1)));
-
-    layerParameters = { layers[0]->GetOutput(), NoPadding(), { 1, 1, 3 }, NoPadding() };
-    VectorType bias1({ -0.43837756f, -0.90868396f, -0.0323102f });
-    layers.push_back(std::unique_ptr<Layer<ElementType>>(new BiasLayer<ElementType>(layerParameters, bias1)));
-
-    layerParameters = { layers[1]->GetOutput(), NoPadding(), { 1, 1, 3 }, NoPadding() };
-    layers.push_back(std::unique_ptr<Layer<ElementType>>(new ActivationLayer<ElementType>(layerParameters, new ReLUActivation<ElementType>())));
-
-    layerParameters = { layers[2]->GetOutput(), NoPadding(), { 1, 1, 1 }, NoPadding() };
-    MatrixType weights2(1, 3);
-    weights2(0, 0) = 1.03084767f;
-    weights2(0, 1) = -0.10772263f;
-    weights2(0, 2) = 1.04077697f;
-    layers.push_back(std::unique_ptr<Layer<ElementType>>(new FullyConnectedLayer<ElementType>(layerParameters, weights2)));
-
-    layerParameters = { layers[3]->GetOutput(), NoPadding(), { 1, 1, 1 }, NoPadding() };
-    VectorType bias2({ 1.40129846e-20f });
-    layers.push_back(std::unique_ptr<Layer<ElementType>>(new BiasLayer<ElementType>(layerParameters, bias2)));
-
-    NeuralNetworkPredictor<ElementType> neuralNetwork(std::move(inputLayer), std::move(layers));
-    std::vector<ElementType> output;
-
-    // Check  the result for the 4 permutations of input. This validates that:
-    // - the weights loaded correctly.
-    // - the operations in each layer are working correctly
-    // - the feed forward logic is working correctly
-
-    output = neuralNetwork.Predict(DataVectorType({ 0, 0 }));
-    testing::ProcessTest("Testing NeuralNetworkPredictor, Predict of XOR net for 0 0 ", Equals(output[0], 0.0));
-
-    output = neuralNetwork.Predict(DataVectorType({ 0, 1 }));
-    testing::ProcessTest("Testing NeuralNetworkPredictor, Predict of XOR net for 0 1 ", Equals(output[0], 1.0));
-
-    output = neuralNetwork.Predict(DataVectorType({ 1, 0 }));
-    testing::ProcessTest("Testing NeuralNetworkPredictor, Predict of XOR net for 1 0 ", Equals(output[0], 1.0));
-
-    output = neuralNetwork.Predict(DataVectorType({ 1, 1 }));
-    testing::ProcessTest("Testing NeuralNetworkPredictor, Predict of XOR net for 1 1 ", Equals(output[0], 0.0));
-
-    // Verify that we can archive and unarchive the predictor
-    utilities::SerializationContext context;
-    NeuralNetworkPredictor<ElementType>::RegisterNeuralNetworkPredictorTypes(context);
-    RegisterNodeTypes(context);
-    std::stringstream strstream;
-    utilities::JsonArchiver archiver(strstream);
-    archiver << neuralNetwork;
-    utilities::JsonUnarchiver unarchiver(strstream, context);
-
-    NeuralNetworkPredictor<ElementType> neuralNetwork2;
-    unarchiver >> neuralNetwork2;
-
-    output = neuralNetwork2.Predict(DataVectorType({ 0, 0 }));
-    testing::ProcessTest("Testing NeuralNetworkPredictor from archive, Predict of XOR net for 0 0 ", Equals(output[0], 0.0));
-
-    output = neuralNetwork2.Predict(DataVectorType({ 0, 1 }));
-    testing::ProcessTest("Testing NeuralNetworkPredictor from archive, Predict of XOR net for 0 1 ", Equals(output[0], 1.0));
-
-    output = neuralNetwork2.Predict(DataVectorType({ 1, 0 }));
-    testing::ProcessTest("Testing NeuralNetworkPredictor from archive, Predict of XOR net for 1 0 ", Equals(output[0], 1.0));
-
-    output = neuralNetwork2.Predict(DataVectorType({ 1, 1 }));
-    testing::ProcessTest("Testing NeuralNetworkPredictor from archive, Predict of XOR net for 1 1 ", Equals(output[0], 0.0));
-
-    // Remove the last 2 layers, (Dense and Bias)
-    neuralNetwork2.RemoveLastLayers(2);
-    output = neuralNetwork2.Predict(DataVectorType({ 0, 1 }));
-    testing::ProcessTest("Testing cut NeuralNetworkPredictor, predict for 0 1 ", Equals(output[0], 0.970072031) && Equals(output[1], 0.0) && Equals(output[2], 0.0));
-}
-
-template <typename ElementType>
-void FillTensor(ell::math::ChannelColumnRowTensor<ElementType>& tensor, int startValue = 0)
-{
-    int val = startValue;
-    tensor.Generate([&val]() { return val++; });
-}
-
-template <typename ElementType>
-void FillVector(ell::math::ColumnVector<ElementType>& vector, int startValue = 0)
-{
-    int val = startValue;
-    vector.Generate([&val]() { return val++; });
-}
-
-template <typename ElementType>
-void ConvolutionalArchiveTest()
-{
-    using namespace ell::predictors;
-    using namespace ell::predictors::neural;
-    using InputParameters = typename InputLayer<ElementType>::InputParameters;
-    using LayerParameters = typename Layer<ElementType>::LayerParameters;
-    using TensorType = typename Layer<ElementType>::TensorType;
-    using DataVectorType = typename NeuralNetworkPredictor<ElementType>::DataVectorType;
-
-    // Build a net
-    typename NeuralNetworkPredictor<ElementType>::InputLayerReference inputLayer;
-    typename NeuralNetworkPredictor<ElementType>::Layers layers;
-
-    InputParameters inputParams = { { 3, 3, 3 }, { PaddingScheme::zeros, 0 }, { 5, 5, 3 }, { PaddingScheme::zeros, 1 }, 1 };
-    inputLayer = std::make_unique<InputLayer<ElementType>>(inputParams);
-
-    LayerParameters layerParameters{ inputLayer->GetOutput(), { PaddingScheme::zeros, 1 }, { 3, 3, 8 }, NoPadding() };
-    auto convolutionMethod = ConvolutionMethod::unrolled;
-    ConvolutionalParameters convolutionalParams{ 3, 1, convolutionMethod, 1 };
-    TensorType convWeights1(8 * 3, 3, 3);
-    FillTensor(convWeights1);
-    layers.push_back(std::unique_ptr<Layer<ElementType>>(new ConvolutionalLayer<ElementType>(layerParameters, convolutionalParams, convWeights1)));
-
-    NeuralNetworkPredictor<ElementType> neuralNetwork(std::move(inputLayer), std::move(layers));
-    std::vector<double> input(3 * 3 * 3);
-    int val = 0;
-    std::generate(input.begin(), input.end(), [&val]() { return val++; });
-
-    utilities::SerializationContext context;
-    NeuralNetworkPredictor<ElementType>::RegisterNeuralNetworkPredictorTypes(context);
-    RegisterNodeTypes(context);
-    std::stringstream strstream;
-    utilities::JsonArchiver archiver(strstream);
-    archiver << neuralNetwork;
-
-    utilities::JsonUnarchiver unarchiver(strstream, context);
-    NeuralNetworkPredictor<ElementType> neuralNetwork2;
-    unarchiver >> neuralNetwork2;
-    auto output = neuralNetwork.Predict(DataVectorType(input));
-    auto output2 = neuralNetwork2.Predict(DataVectorType(input));
-    testing::ProcessTest("Testing Convolutional predictor from archive", testing::IsEqual(output, output2));
-}
-
-template <typename ElementType>
-void BinaryConvolutionalArchiveTest()
-{
-    using namespace ell::predictors;
-    using namespace ell::predictors::neural;
-    using InputParameters = typename InputLayer<ElementType>::InputParameters;
-    using LayerParameters = typename Layer<ElementType>::LayerParameters;
-    using TensorType = typename Layer<ElementType>::TensorType;
-    using DataVectorType = typename NeuralNetworkPredictor<ElementType>::DataVectorType;
-
-    // Build a net
-    typename NeuralNetworkPredictor<ElementType>::InputLayerReference inputLayer;
-    typename NeuralNetworkPredictor<ElementType>::Layers layers;
-
-    InputParameters inputParams = { { 3, 3, 3 }, { PaddingScheme::zeros, 0 }, { 5, 5, 3 }, { PaddingScheme::zeros, 1 }, 1 };
-    inputLayer = std::make_unique<InputLayer<ElementType>>(inputParams);
-
-    LayerParameters layerParameters{ inputLayer->GetOutput(), { PaddingScheme::zeros, 1 }, { 3, 3, 8 }, NoPadding() };
-    BinaryConvolutionalParameters convolutionalParams{ 3, 1, BinaryConvolutionMethod::bitwise, BinaryWeightsScale::mean };
-    TensorType convWeights1(8 * 3, 3, 3);
-    FillTensor(convWeights1);
-    layers.push_back(std::unique_ptr<Layer<ElementType>>(new BinaryConvolutionalLayer<ElementType>(layerParameters, convolutionalParams, convWeights1)));
-
-    NeuralNetworkPredictor<ElementType> neuralNetwork(std::move(inputLayer), std::move(layers));
-    std::vector<double> input(3 * 3 * 3);
-    int val = 0;
-    std::generate(input.begin(), input.end(), [&val]() { return val++; });
-
-    utilities::SerializationContext context;
-    NeuralNetworkPredictor<ElementType>::RegisterNeuralNetworkPredictorTypes(context);
-    RegisterNodeTypes(context);
-    std::stringstream strstream;
-    utilities::JsonArchiver archiver(strstream);
-    archiver << neuralNetwork;
-
-    utilities::JsonUnarchiver unarchiver(strstream, context);
-    NeuralNetworkPredictor<ElementType> neuralNetwork2;
-    unarchiver >> neuralNetwork2;
-    auto output = neuralNetwork.Predict(DataVectorType(input));
-    auto output2 = neuralNetwork2.Predict(DataVectorType(input));
-    testing::ProcessTest("Testing Binary convolutional predictor from archive", testing::IsEqual(output, output2));
-}
diff --git a/libraries/testing/CMakeLists.txt b/libraries/testing/CMakeLists.txt
index 2ec0bbac9..007d2a1ab 100644
--- a/libraries/testing/CMakeLists.txt
+++ b/libraries/testing/CMakeLists.txt
@@ -10,13 +10,11 @@ set(src
 set(include
     include/testing.h
 )
-set(tcc )
 
 source_group("src" FILES ${src})
 source_group("include" FILES ${include})
-source_group("tcc" FILES ${tcc})
 
-add_library(${library_name} ${src} ${include} ${tcc})
+add_library(${library_name} ${src} ${include})
 target_include_directories(${library_name} PRIVATE include ${ELL_LIBRARIES_DIR})
 target_link_libraries(${library_name} utilities)
 
diff --git a/libraries/trainers/CMakeLists.txt b/libraries/trainers/CMakeLists.txt
index 2163cf1f6..d32944594 100644
--- a/libraries/trainers/CMakeLists.txt
+++ b/libraries/trainers/CMakeLists.txt
@@ -34,25 +34,13 @@ set (include include/EvaluatingTrainer.h
              include/ThresholdFinder.h
 )
 
-set (tcc tcc/EvaluatingTrainer.tcc
-         tcc/ForestTrainer.tcc
-         tcc/HistogramForestTrainer.tcc
-         tcc/MeanCalculator.tcc
-         tcc/ProtoNNTrainerUtils.tcc
-         tcc/SortingForestTrainer.tcc
-         tcc/SweepingTrainer.tcc
-         tcc/SDCATrainer.tcc
-         tcc/SGDTrainer.tcc
-         tcc/ThresholdFinder.tcc)
-
 set (doc doc/README.md)
 
 source_group("src" FILES ${src})
 source_group("include" FILES ${include})
-source_group("tcc" FILES ${tcc})
 source_group("doc" FILES ${doc})
 
-add_library(${library_name} ${src} ${include} ${tcc} ${doc})
+add_library(${library_name} ${src} ${include} ${doc})
 target_include_directories(${library_name} PRIVATE include ${ELL_LIBRARIES_DIR})
 target_link_libraries(${library_name} utilities evaluators predictors)
 
diff --git a/libraries/trainers/include/EvaluatingTrainer.h b/libraries/trainers/include/EvaluatingTrainer.h
index 6760fda54..3635e3084 100644
--- a/libraries/trainers/include/EvaluatingTrainer.h
+++ b/libraries/trainers/include/EvaluatingTrainer.h
@@ -74,4 +74,46 @@ namespace trainers
 } // namespace trainers
 } // namespace ell
 
-#include "../tcc/EvaluatingTrainer.tcc"
+#pragma region implementation
+
+#include <utility>
+
+namespace ell
+{
+namespace trainers
+{
+    template <typename PredictorType>
+    EvaluatingTrainer<PredictorType>::EvaluatingTrainer(
+        std::unique_ptr<InternalTrainerType>&& internalTrainer,
+        std::shared_ptr<EvaluatorType> evaluator) :
+        _internalTrainer(std::move(internalTrainer)),
+        _evaluator(evaluator)
+    {
+        assert(_internalTrainer != nullptr);
+        assert(_evaluator != nullptr);
+    }
+
+    template <typename PredictorType>
+    void EvaluatingTrainer<PredictorType>::SetDataset(const data::AnyDataset& anyDataset)
+    {
+        _internalTrainer->SetDataset(anyDataset);
+    }
+
+    template <typename PredictorType>
+    void EvaluatingTrainer<PredictorType>::Update()
+    {
+        _internalTrainer->Update();
+        _evaluator->Evaluate(_internalTrainer->GetPredictor());
+    }
+
+    template <typename PredictorType>
+    EvaluatingTrainer<PredictorType> MakeEvaluatingTrainer(
+        std::unique_ptr<ITrainer<PredictorType>>&& internalTrainer,
+        std::shared_ptr<evaluators::IEvaluator<PredictorType>> evaluator)
+    {
+        return EvaluatingTrainer<PredictorType>(std::move(internalTrainer), evaluator);
+    }
+} // namespace trainers
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/trainers/include/ForestTrainer.h b/libraries/trainers/include/ForestTrainer.h
index d2d87b8d8..84d60790e 100644
--- a/libraries/trainers/include/ForestTrainer.h
+++ b/libraries/trainers/include/ForestTrainer.h
@@ -208,4 +208,228 @@ namespace trainers
 } // namespace trainers
 } // namespace ell
 
-#include "../tcc/ForestTrainer.tcc"
+#pragma region implementation
+
+//#define VERBOSE_MODE( x ) x   // uncomment this for very verbose mode
+#define VERBOSE_MODE(x) // uncomment this for nonverbose mode
+
+namespace ell
+{
+namespace trainers
+{
+    template <typename SplitRuleType, typename EdgePredictorType, typename BoosterType>
+    ForestTrainer<SplitRuleType, EdgePredictorType, BoosterType>::ForestTrainer(const BoosterType& booster, const ForestTrainerParameters& parameters) :
+        _booster(booster),
+        _parameters(parameters),
+        _forest()
+    {
+    }
+
+    template <typename SplitRuleType, typename EdgePredictorType, typename BoosterType>
+    void ForestTrainer<SplitRuleType, EdgePredictorType, BoosterType>::SetDataset(const data::AnyDataset& anyDataset)
+    {
+        // materialize a dataset of dense DataVectors with metadata that contains both strong and weak weight and lables for each example
+        _dataset = data::Dataset<TrainerExampleType>(anyDataset);
+
+        // initalizes the special fields in the dataset metadata: weak weight and label, currentOutput
+        for (size_t rowIndex = 0; rowIndex < _dataset.NumExamples(); ++rowIndex)
+        {
+            auto& example = _dataset[rowIndex];
+            auto prediction = _forest.Predict(example.GetDataVector());
+            auto& metadata = example.GetMetadata();
+            metadata.currentOutput = prediction;
+            metadata.weak = _booster.GetWeakWeightLabel(metadata.strong, prediction);
+        }
+    }
+
+    template <typename SplitRuleType, typename EdgePredictorType, typename BoosterType>
+    void ForestTrainer<SplitRuleType, EdgePredictorType, BoosterType>::Update()
+    {
+        // boosting loop (outer loop)
+        for (size_t round = 0; round < _parameters.numRounds; ++round)
+        {
+            // call the booster and compute sums for the entire data set
+            Sums sums = SetWeakWeightsLabels();
+
+            // use the computed sums to calaculate the bias term, set it in the forest and the data set
+            double bias = sums.GetMeanLabel();
+            _forest.AddToBias(bias);
+            UpdateCurrentOutputs(bias);
+
+            VERBOSE_MODE(_dataset.Print(std::cout));
+            VERBOSE_MODE(std::cout << "\nBoosting iteration\n");
+            VERBOSE_MODE(_forest.PrintLine(std::cout, 1));
+
+            // find split candidate for root node and push it onto the priority queue
+            auto rootSplit = GetBestSplitRuleAtNode(_forest.GetNewRootId(), Range{ 0, _dataset.NumExamples() }, sums);
+
+            // check for positive gain
+            if (rootSplit.gain < _parameters.minSplitGain || _parameters.maxSplitsPerRound == 0)
+            {
+                return;
+            }
+
+            // reset the queue and add the root split from the graph
+            if (_queue.size() > 0)
+            {
+                _queue = SplitCandidatePriorityQueue();
+            }
+            _queue.push(std::move(rootSplit));
+
+            // start performing splits until the maximum is reached or the queue is empty
+            PerformSplits(_parameters.maxSplitsPerRound);
+        }
+    }
+
+    template <typename SplitRuleType, typename EdgePredictorType, typename BoosterType>
+    ForestTrainer<SplitRuleType, EdgePredictorType, BoosterType>::SplitCandidate::SplitCandidate(SplittableNodeId nodeId, Range totalRange, Sums totalSums) :
+        gain(0),
+        nodeId(nodeId),
+        stats(totalSums),
+        ranges(totalRange)
+    {
+    }
+
+    template <typename SplitRuleType, typename EdgePredictorType, typename BoosterType>
+    auto ForestTrainer<SplitRuleType, EdgePredictorType, BoosterType>::SetWeakWeightsLabels() -> Sums
+    {
+        Sums sums;
+
+        for (size_t rowIndex = 0; rowIndex < _dataset.NumExamples(); ++rowIndex)
+        {
+            auto& metadata = _dataset[rowIndex].GetMetadata();
+            metadata.weak = _booster.GetWeakWeightLabel(metadata.strong, metadata.currentOutput);
+            sums.Increment(metadata.weak);
+        }
+
+        if (sums.sumWeights == 0.0)
+        {
+            throw utilities::InputException(utilities::InputExceptionErrors::badData, "sum of weights in data is zero");
+        }
+
+        return sums;
+    }
+
+    template <typename SplitRuleType, typename EdgePredictorType, typename BoosterType>
+    void ForestTrainer<SplitRuleType, EdgePredictorType, BoosterType>::UpdateCurrentOutputs(double value)
+    {
+        for (size_t rowIndex = 0; rowIndex < _dataset.NumExamples(); ++rowIndex)
+        {
+            auto& example = _dataset[rowIndex];
+            example.GetMetadata().currentOutput += value;
+        }
+    }
+
+    template <typename SplitRuleType, typename EdgePredictorType, typename BoosterType>
+    void ForestTrainer<SplitRuleType, EdgePredictorType, BoosterType>::UpdateCurrentOutputs(Range range, const EdgePredictorType& edgePredictor)
+    {
+        for (size_t rowIndex = range.firstIndex; rowIndex < range.firstIndex + range.size; ++rowIndex)
+        {
+            auto& example = _dataset[rowIndex];
+            example.GetMetadata().currentOutput += edgePredictor.Predict(example.GetDataVector());
+        }
+    }
+
+    template <typename SplitRuleType, typename EdgePredictorType, typename BoosterType>
+    void ForestTrainer<SplitRuleType, EdgePredictorType, BoosterType>::PerformSplits(size_t maxSplits)
+    {
+        // count splits
+        size_t splitCount = 0;
+
+        // splitting loop (inner loop)
+        while (!_queue.empty())
+        {
+            VERBOSE_MODE(std::cout << "\nSplit iteration\n");
+            VERBOSE_MODE(_queue.PrintLine(std::cout, 1));
+
+            auto splitCandidate = _queue.top();
+            _queue.pop();
+
+            const auto& stats = splitCandidate.stats;
+            const auto& ranges = splitCandidate.ranges;
+
+            // sort the data according to the performed split and update the metadata to reflect this change
+            SortNodeDataset(ranges.GetTotalRange(), splitCandidate.splitRule);
+
+            // update current output field in metadata
+            auto edgePredictors = GetEdgePredictors(stats);
+            for (size_t i = 0; i < splitCandidate.splitRule.NumOutputs(); ++i)
+            {
+                UpdateCurrentOutputs(ranges.GetChildRange(i), edgePredictors[i]);
+            }
+
+            // have the forest perform the split
+            using SplitAction = predictors::SimpleForestPredictor::SplitAction;
+            SplitAction splitAction(splitCandidate.nodeId, splitCandidate.splitRule, edgePredictors);
+            auto interiorNodeIndex = _forest.Split(splitAction);
+
+            VERBOSE_MODE(_dataset.Print(std::cout, 1));
+            VERBOSE_MODE(std::cout << "\n");
+            VERBOSE_MODE(_forest.PrintLine(std::cout, 1));
+
+            // if max number of splits reached, exit the loop
+            if (++splitCount >= maxSplits)
+            {
+                break;
+            }
+
+            // queue new split candidates
+            for (size_t i = 0; i < splitCandidate.splitRule.NumOutputs(); ++i)
+            {
+                auto splitCandidate = GetBestSplitRuleAtNode(_forest.GetChildId(interiorNodeIndex, i), ranges.GetChildRange(i), stats.GetChildSums(i));
+                if (splitCandidate.gain > _parameters.minSplitGain)
+                {
+                    _queue.push(std::move(splitCandidate));
+                }
+            }
+        }
+    }
+
+    template <typename SplitRuleType, typename EdgePredictorType, typename BoosterType>
+    void ForestTrainer<SplitRuleType, EdgePredictorType, BoosterType>::SortNodeDataset(Range range, const SplitRuleType& splitRule)
+    {
+        if (splitRule.NumOutputs() == 2)
+        {
+            _dataset.Partition([splitRule](const data::Example<DataVectorType, TrainerMetadata>& example) { return splitRule.Predict(example.GetDataVector()) == 0; },
+                               range.firstIndex,
+                               range.size);
+        }
+        else
+        {
+            _dataset.Sort([splitRule](const data::Example<DataVectorType, TrainerMetadata>& example) { return splitRule.Predict(example.GetDataVector()); },
+                          range.firstIndex,
+                          range.size);
+        }
+    }
+
+    //
+    // debugging code
+    //
+
+    template <typename SplitRuleType, typename EdgePredictorType, typename BoosterType>
+    void ForestTrainer<SplitRuleType, EdgePredictorType, BoosterType>::SplitCandidatePriorityQueue::PrintLine(std::ostream& os, size_t tabs) const
+    {
+        os << std::string(tabs * 4, ' ') << "Priority Queue Size: " << size() << "\n";
+
+        for (const auto& candidate : std::priority_queue<SplitCandidate>::c) // c is a protected member of std::priority_queue
+        {
+            os << "\n";
+            candidate.PrintLine(os, tabs + 1);
+            os << "\n";
+        }
+    }
+
+    template <typename SplitRuleType, typename EdgePredictorType, typename BoosterType>
+    void ForestTrainer<SplitRuleType, EdgePredictorType, BoosterType>::SplitCandidate::PrintLine(std::ostream& os, size_t tabs) const
+    {
+        os << std::string(tabs * 4, ' ') << "gain = " << gain << "\n";
+        os << std::string(tabs * 4, ' ') << "node = ";
+        nodeId.Print(os);
+        os << "\n";
+        splitRule.PrintLine(os, tabs);
+        stats.PrintLine(os, tabs);
+    }
+} // namespace trainers
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/trainers/include/HistogramForestTrainer.h b/libraries/trainers/include/HistogramForestTrainer.h
index 0822b84d0..955dcf03a 100644
--- a/libraries/trainers/include/HistogramForestTrainer.h
+++ b/libraries/trainers/include/HistogramForestTrainer.h
@@ -90,4 +90,115 @@ namespace trainers
 } // namespace trainers
 } // namespace ell
 
-#include "../tcc/HistogramForestTrainer.tcc"
+#pragma region implementation
+
+#include <utilities/include/RandomEngines.h>
+
+namespace ell
+{
+namespace trainers
+{
+    template <typename LossFunctionType, typename BoosterType, typename ThresholdFinderType>
+    HistogramForestTrainer<LossFunctionType, BoosterType, ThresholdFinderType>::HistogramForestTrainer(const LossFunctionType& lossFunction, const BoosterType& booster, const ThresholdFinderType& thresholdFinder, const HistogramForestTrainerParameters& parameters) :
+        ForestTrainer<SplitRuleType, EdgePredictorType, BoosterType>(booster, parameters),
+        _lossFunction(lossFunction),
+        _thresholdFinder(thresholdFinder),
+        _random(utilities::GetRandomEngine(parameters.randomSeed)),
+        _thresholdFinderSampleSize(parameters.thresholdFinderSampleSize),
+        _candidatesPerInput(parameters.candidatesPerInput)
+    {
+    }
+
+    template <typename LossFunctionType, typename BoosterType, typename ThresholdFinderType>
+    auto HistogramForestTrainer<LossFunctionType, BoosterType, ThresholdFinderType>::GetBestSplitRuleAtNode(SplittableNodeId nodeId, Range range, Sums sums) -> SplitCandidate
+    {
+        SplitCandidate bestSplitCandidate(nodeId, range, sums);
+
+        auto splitRuleCandidates = CallThresholdFinder(range);
+
+        for (const auto& splitRuleCandidate : splitRuleCandidates)
+        {
+            Sums sums0;
+            size_t size0;
+
+            std::tie(sums0, size0) = EvaluateSplitRule(splitRuleCandidate, range);
+
+            Sums sums1 = sums - sums0;
+            double gain = CalculateGain(sums, sums0, sums1);
+
+            // find gain maximizer
+            if (gain > bestSplitCandidate.gain)
+            {
+                bestSplitCandidate.gain = gain;
+                bestSplitCandidate.splitRule = splitRuleCandidate;
+                bestSplitCandidate.ranges.SplitChildRange(0, size0);
+                bestSplitCandidate.stats.SetChildSums({ sums0, sums1 });
+            }
+        }
+
+        return bestSplitCandidate;
+    }
+
+    template <typename LossFunctionType, typename BoosterType, typename ThresholdFinderType>
+    auto HistogramForestTrainer<LossFunctionType, BoosterType, ThresholdFinderType>::GetEdgePredictors(const NodeStats& nodeStats) -> std::vector<EdgePredictorType>
+    {
+        double output = nodeStats.GetTotalSums().GetMeanLabel();
+        double output0 = nodeStats.GetChildSums(0).GetMeanLabel() - output;
+        double output1 = nodeStats.GetChildSums(1).GetMeanLabel() - output;
+        return std::vector<EdgePredictorType>{ output0, output1 };
+    }
+
+    template <typename LossFunctionType, typename BoosterType, typename ThresholdFinderType>
+    double HistogramForestTrainer<LossFunctionType, BoosterType, ThresholdFinderType>::CalculateGain(const Sums& sums, const Sums& sums0, const Sums& sums1) const
+    {
+        if (sums0.sumWeights == 0 || sums1.sumWeights == 0)
+        {
+            return 0;
+        }
+
+        return sums0.sumWeights * _lossFunction.BregmanGenerator(sums0.sumWeightedLabels / sums0.sumWeights) +
+               sums1.sumWeights * _lossFunction.BregmanGenerator(sums1.sumWeightedLabels / sums1.sumWeights) -
+               sums.sumWeights * _lossFunction.BregmanGenerator(sums.sumWeightedLabels / sums.sumWeights);
+    }
+
+    template <typename LossFunctionType, typename BoosterType, typename ThresholdFinderType>
+    auto HistogramForestTrainer<LossFunctionType, BoosterType, ThresholdFinderType>::CallThresholdFinder(Range range) -> std::vector<SplitRuleType>
+    {
+        // uniformly choose _candidatesPerInput from the range, without replacement
+        _dataset.RandomPermute(_random, range.firstIndex, range.size, _thresholdFinderSampleSize);
+
+        auto thresholds = _thresholdFinder.GetThresholds(_dataset.GetExampleReferenceIterator(range.firstIndex, _thresholdFinderSampleSize));
+        return thresholds;
+    }
+
+    template <typename LossFunctionType, typename BoosterType, typename ThresholdFinderType>
+    auto HistogramForestTrainer<LossFunctionType, BoosterType, ThresholdFinderType>::EvaluateSplitRule(const SplitRuleType& splitRule, const Range& range) const -> std::tuple<Sums, size_t>
+    {
+        Sums sums0;
+        size_t size0 = 0;
+
+        auto exampleIterator = _dataset.GetExampleIterator(range.firstIndex, range.size);
+        while (exampleIterator.IsValid())
+        {
+            const auto& example = exampleIterator.Get();
+            auto prediction = splitRule.Predict(example.GetDataVector());
+            if (prediction == 0)
+            {
+                sums0.Increment(example.GetMetadata().weak);
+                ++size0;
+            }
+            exampleIterator.Next();
+        }
+
+        return std::make_tuple(sums0, size0);
+    };
+
+    template <typename LossFunctionType, typename BoosterType, typename ThresholdFinderType>
+    std::unique_ptr<ITrainer<predictors::SimpleForestPredictor>> MakeHistogramForestTrainer(const LossFunctionType& lossFunction, const BoosterType& booster, const ThresholdFinderType& thresholdFinder, const HistogramForestTrainerParameters& parameters)
+    {
+        return std::make_unique<HistogramForestTrainer<LossFunctionType, BoosterType, ThresholdFinderType>>(lossFunction, booster, thresholdFinder, parameters);
+    }
+} // namespace trainers
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/trainers/include/MeanCalculator.h b/libraries/trainers/include/MeanCalculator.h
index c7a9b9fce..2ab4874f7 100644
--- a/libraries/trainers/include/MeanCalculator.h
+++ b/libraries/trainers/include/MeanCalculator.h
@@ -56,4 +56,53 @@ namespace trainers
 } // namespace trainers
 } // namespace ell
 
-#include "../tcc/MeanCalculator.tcc"
\ No newline at end of file
+#pragma region implementation
+
+#include <data/include/TransformedDataVector.h>
+
+namespace ell
+{
+namespace trainers
+{
+    template <data::IterationPolicy policy, typename TransformationType>
+    math::RowVector<double> CalculateTransformedMean(const data::AnyDataset& anyDataset, TransformationType transformation)
+    {
+        // get example iterator
+        auto exampleIterator = anyDataset.GetExampleIterator<data::AutoSupervisedExample>();
+
+        math::RowVector<double> result;
+        size_t count = 0;
+        while (exampleIterator.IsValid())
+        {
+            const auto& x = exampleIterator.Get().GetDataVector();
+            if (x.PrefixLength() > result.Size())
+            {
+                result.Resize(x.PrefixLength());
+            }
+
+            result += data::MakeTransformedDataVector<policy>(x, transformation);
+            ++count;
+            exampleIterator.Next();
+        }
+
+        double scale = 1.0 / count;
+        result.Transform([scale](double x) { return scale * x; });
+
+        return result;
+    }
+
+    template <typename TransformationType>
+    math::RowVector<double> CalculateSparseTransformedMean(const data::AnyDataset& anyDataset, TransformationType transformation)
+    {
+        return CalculateTransformedMean<data::IterationPolicy::skipZeros>(anyDataset, transformation);
+    }
+
+    template <typename TransformationType>
+    math::RowVector<double> CalculateDenseTransformedMean(const data::AnyDataset& anyDataset, TransformationType transformation)
+    {
+        return CalculateTransformedMean<data::IterationPolicy::all>(anyDataset, transformation);
+    }
+} // namespace trainers
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/trainers/include/ProtoNNTrainerUtils.h b/libraries/trainers/include/ProtoNNTrainerUtils.h
index 95d210e62..ab3345170 100644
--- a/libraries/trainers/include/ProtoNNTrainerUtils.h
+++ b/libraries/trainers/include/ProtoNNTrainerUtils.h
@@ -44,4 +44,128 @@ namespace trainers
 } // namespace trainers
 } // namespace ell
 
-#include "../tcc/ProtoNNTrainerUtils.tcc"
+#pragma region implementation
+
+#pragma once
+
+#include "ProtoNNTrainerUtils.h"
+
+#include <data/include/Dataset.h>
+#include <data/include/Example.h>
+
+#include <cstddef>
+#include <memory>
+
+#include <fstream>
+#include <sstream>
+
+namespace ell
+{
+namespace trainers
+{
+    void ProtoNNTrainerUtils::GetDatasetAsMatrix(const data::AutoSupervisedDataset& anyDataset, math::MatrixReference<double, math::MatrixLayout::columnMajor> X, math::MatrixReference<double, math::MatrixLayout::columnMajor> Y)
+    {
+        auto exampleIterator = anyDataset.GetExampleIterator();
+        int colIdx = 0;
+        while (exampleIterator.IsValid())
+        {
+            // get the Next example
+            const auto& example = exampleIterator.Get();
+            double label = example.GetMetadata().label;
+            const auto& dataVector = example.GetDataVector().ToArray();
+
+            for (size_t j = 0; j < dataVector.size(); j++)
+            {
+                X(j, colIdx) = dataVector[j];
+            }
+
+            for (size_t i = 0; i < Y.NumRows(); i++)
+            {
+                if (i == label)
+                    Y((size_t)i, colIdx) = 1;
+                else
+                    Y((size_t)i, colIdx) = 0;
+            }
+
+            colIdx += 1;
+            exampleIterator.Next();
+        }
+    }
+
+    template <typename math::MatrixLayout Layout>
+    math::Matrix<double, Layout> ProtoNNTrainerUtils::MatrixExp(math::ConstMatrixReference<double, Layout> A)
+    {
+        auto m = A.NumRows();
+        auto n = A.NumColumns();
+        math::Matrix<double, Layout> R(m, n);
+        for (size_t i = 0; i < m; i++)
+        {
+            for (size_t j = 0; j < n; j++)
+            {
+                R(i, j) = std::exp(A(i, j));
+            }
+        }
+
+        return R;
+    }
+
+    template <typename math::MatrixLayout Layout>
+    double ProtoNNTrainerUtils::MatrixNorm(math::ConstMatrixReference<double, Layout> A)
+    {
+        double norm = 0.0;
+        for (size_t i = 0; i < A.NumColumns(); i++)
+        {
+            for (size_t j = 0; j < A.NumRows(); j++)
+            {
+                norm += A(j, i) * A(j, i);
+            }
+        }
+
+        norm = sqrt(norm);
+        return norm;
+    }
+
+    template <typename math::MatrixLayout Layout>
+    double ProtoNNTrainerUtils::MaxAbsoluteElement(math::ConstMatrixReference<double, Layout> A)
+    {
+        double max = A(0, 0);
+        auto m = A.NumRows();
+        auto n = A.NumColumns();
+        for (size_t i = 0; i < m; i++)
+        {
+            for (size_t j = 0; j < n; j++)
+            {
+                max = std::max(max, std::abs(A(i, j)));
+            }
+        }
+
+        return max;
+    }
+
+    void ProtoNNTrainerUtils::HardThresholding(math::MatrixReference<double, math::MatrixLayout::columnMajor> M, double sparsity)
+    {
+        assert(sparsity >= 0.0 && sparsity <= 1.0);
+        if (sparsity >= 0.999)
+            return;
+
+        const double eps = 1e-8;
+
+        std::vector<double> data;
+        data.assign(M.GetDataPointer(), M.GetDataPointer() + (size_t)(M.NumRows() * M.NumColumns()));
+        std::sort(data.begin(), data.end(), [](double i, double j) { return std::abs(i) > std::abs(j); });
+
+        size_t mat_size = M.NumRows() * M.NumColumns();
+
+        double thresh = std::abs(data[(size_t)((sparsity * mat_size) - 1)]);
+        if (thresh <= eps)
+            thresh = eps;
+
+        for (size_t i = 0; i < M.NumColumns(); i++)
+        {
+            M.GetColumn(i).Transform([thresh](double x) { return (std::abs(x) < thresh ? 0.0 : x); });
+        }
+    }
+} // namespace trainers
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/trainers/include/SDCATrainer.h b/libraries/trainers/include/SDCATrainer.h
index 0b6b697ed..8207cc846 100644
--- a/libraries/trainers/include/SDCATrainer.h
+++ b/libraries/trainers/include/SDCATrainer.h
@@ -128,4 +128,139 @@ namespace trainers
 } // namespace trainers
 } // namespace ell
 
-#include "../tcc/SDCATrainer.tcc"
+#pragma region implementation
+
+#include <data/include/DataVectorOperations.h>
+
+#include <utilities/include/RandomEngines.h>
+
+namespace ell
+{
+namespace trainers
+{
+    template <typename LossFunctionType, typename RegularizerType>
+    SDCATrainer<LossFunctionType, RegularizerType>::SDCATrainer(const LossFunctionType& lossFunction, const RegularizerType& regularizer, const SDCATrainerParameters& parameters) :
+        _lossFunction(lossFunction),
+        _regularizer(regularizer),
+        _parameters(parameters)
+    {
+        _random = utilities::GetRandomEngine(parameters.randomSeedString);
+    }
+
+    template <typename LossFunctionType, typename RegularizerType>
+    void SDCATrainer<LossFunctionType, RegularizerType>::SetDataset(const data::AnyDataset& anyDataset)
+    {
+        DEBUG_THROW(_v.Norm0() != 0, utilities::LogicException(utilities::LogicExceptionErrors::illegalState, "can only call SetDataset before updates"));
+
+        _dataset = data::Dataset<TrainerExampleType>(anyDataset);
+        auto numExamples = _dataset.NumExamples();
+        _inverseScaledRegularization = 1.0 / (numExamples * _parameters.regularization);
+
+        _predictorInfo.primalObjective = 0;
+        _predictorInfo.dualObjective = 0;
+
+        // precompute the norm of each example
+        for (size_t rowIndex = 0; rowIndex < numExamples; ++rowIndex)
+        {
+            auto& example = _dataset[rowIndex];
+            example.GetMetadata().norm2Squared = example.GetDataVector().Norm2Squared();
+
+            auto label = example.GetMetadata().weightLabel.label;
+            _predictorInfo.primalObjective += _lossFunction(0, label) / numExamples;
+        }
+    }
+
+    template <typename LossFunctionType, typename RegularizerType>
+    void SDCATrainer<LossFunctionType, RegularizerType>::Update()
+    {
+        if (_parameters.permute)
+        {
+            _dataset.RandomPermute(_random);
+        }
+
+        // Iterate
+        for (size_t i = 0; i < _dataset.NumExamples(); ++i)
+        {
+            Step(_dataset[i]);
+        }
+
+        // Finish
+        ComputeObjectives();
+    }
+
+    template <typename LossFunctionType, typename RegularizerType>
+    SDCATrainer<LossFunctionType, RegularizerType>::TrainerMetadata::TrainerMetadata(const data::WeightLabel& original) :
+        weightLabel(original)
+    {}
+
+    template <typename LossFunctionType, typename RegularizerType>
+    void SDCATrainer<LossFunctionType, RegularizerType>::Step(TrainerExampleType& example)
+    {
+        const auto& dataVector = example.GetDataVector();
+        ResizeTo(dataVector);
+
+        auto weightLabel = example.GetMetadata().weightLabel;
+        auto norm2Squared = example.GetMetadata().norm2Squared + 1; // add one because of bias term
+        auto lipschitz = norm2Squared * _inverseScaledRegularization;
+        auto dual = example.GetMetadata().dualVariable;
+
+        if (lipschitz > 0)
+        {
+            auto prediction = _predictor.Predict(dataVector);
+
+            auto newDual = _lossFunction.ConjugateProx(1.0 / lipschitz, dual + prediction / lipschitz, weightLabel.label);
+            auto dualDiff = newDual - dual;
+
+            if (dualDiff != 0)
+            {
+                _v.Transpose() += (-dualDiff * _inverseScaledRegularization) * dataVector;
+                _d += (-dualDiff * _inverseScaledRegularization);
+                _regularizer.ConjugateGradient(_v, _d, _predictor.GetWeights(), _predictor.GetBias());
+                example.GetMetadata().dualVariable = newDual;
+            }
+        }
+    }
+
+    template <typename LossFunctionType, typename RegularizerType>
+    void SDCATrainer<LossFunctionType, RegularizerType>::ComputeObjectives()
+    {
+        double invSize = 1.0 / _dataset.NumExamples();
+
+        _predictorInfo.primalObjective = 0;
+        _predictorInfo.dualObjective = 0;
+
+        for (size_t i = 0; i < _dataset.NumExamples(); ++i)
+        {
+            const auto& example = _dataset.GetExample(i);
+            auto label = example.GetMetadata().weightLabel.label;
+            auto prediction = _predictor.Predict(example.GetDataVector());
+            auto dualVariable = example.GetMetadata().dualVariable;
+
+            _predictorInfo.primalObjective += invSize * _lossFunction(prediction, label);
+            _predictorInfo.dualObjective -= invSize * _lossFunction.Conjugate(dualVariable, label);
+        }
+
+        _predictorInfo.primalObjective += _parameters.regularization * _regularizer(_predictor.GetWeights(), _predictor.GetBias());
+        _predictorInfo.dualObjective -= _parameters.regularization * _regularizer.Conjugate(_v, _d);
+    }
+
+    template <typename LossFunctionType, typename RegularizerType>
+    void SDCATrainer<LossFunctionType, RegularizerType>::ResizeTo(const data::AutoDataVector& x)
+    {
+        auto xSize = x.PrefixLength();
+        if (xSize > _predictor.Size())
+        {
+            _predictor.Resize(xSize);
+            _v.Resize(xSize);
+        }
+    }
+
+    template <typename LossFunctionType, typename RegularizerType>
+    std::unique_ptr<trainers::ITrainer<predictors::LinearPredictor<double>>> MakeSDCATrainer(const LossFunctionType& lossFunction, const RegularizerType& regularizer, const SDCATrainerParameters& parameters)
+    {
+        return std::make_unique<SDCATrainer<LossFunctionType, RegularizerType>>(lossFunction, regularizer, parameters);
+    }
+} // namespace trainers
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/trainers/include/SGDTrainer.h b/libraries/trainers/include/SGDTrainer.h
index e0f7626da..19439e4a7 100644
--- a/libraries/trainers/include/SGDTrainer.h
+++ b/libraries/trainers/include/SGDTrainer.h
@@ -258,4 +258,300 @@ namespace trainers
 } // namespace trainers
 } // namespace ell
 
-#include "../tcc/SGDTrainer.tcc"
+#pragma region implementation
+
+#include <cmath>
+
+#include <data/include/DataVector.h>
+#include <data/include/DataVectorOperations.h>
+#include <data/include/Dataset.h>
+
+#include <math/include/VectorOperations.h>
+
+namespace ell
+{
+namespace trainers
+{
+    // the code in this file follows the notation and pseudocode in https://arxiv.org/abs/1612.09147
+
+    //
+    // SGDTrainer
+    //
+
+    template <typename LossFunctionType>
+    SGDTrainer<LossFunctionType>::SGDTrainer(const LossFunctionType& lossFunction, const SGDTrainerParameters& parameters) :
+        SGDTrainerBase(parameters.randomSeedString),
+        _lossFunction(lossFunction),
+        _parameters(parameters)
+    {
+    }
+
+    template <typename LossFunctionType>
+    void SGDTrainer<LossFunctionType>::DoFirstStep(const data::AutoDataVector& x, double y, double weight)
+    {
+        DoNextStep(x, y, weight);
+    }
+
+    template <typename LossFunctionType>
+    void SGDTrainer<LossFunctionType>::DoNextStep(const data::AutoDataVector& x, double y, double weight)
+    {
+        ResizeTo(x);
+        ++_t;
+
+        // Predict
+        double p = _lastPredictor.Predict(x);
+
+        // calculate the loss derivative
+        double g = weight * _lossFunction.GetDerivative(p, y);
+
+        // get abbreviated names
+        auto& lastW = _lastPredictor.GetWeights();
+        double& lastB = _lastPredictor.GetBias();
+
+        // update the (last) predictor
+        double scaleCoefficient = 1.0 - 1.0 / _t;
+        lastW *= scaleCoefficient;
+        lastB *= scaleCoefficient;
+
+        const double lambda = _parameters.regularization;
+        double updateCoefficient = -g / (lambda * _t);
+        lastW.Transpose() += updateCoefficient * x;
+        lastB += updateCoefficient;
+
+        // get abbreviated names
+        auto& averagedW = _averagedPredictor.GetWeights();
+        double& averagedB = _averagedPredictor.GetBias();
+
+        // update the average predictor
+        averagedW *= scaleCoefficient;
+        averagedB *= scaleCoefficient;
+
+        averagedW += 1.0 / _t * lastW;
+        averagedB += lastB / _t;
+    }
+
+    template <typename LossFunctionType>
+    void SGDTrainer<LossFunctionType>::ResizeTo(const data::AutoDataVector& x)
+    {
+        auto xSize = x.PrefixLength();
+        if (xSize > _lastPredictor.Size())
+        {
+            _lastPredictor.Resize(xSize);
+            _averagedPredictor.Resize(xSize);
+        }
+    }
+
+    //
+    // SparseDataSGDTrainer
+    //
+
+    template <typename LossFunctionType>
+    SparseDataSGDTrainer<LossFunctionType>::SparseDataSGDTrainer(const LossFunctionType& lossFunction, const SGDTrainerParameters& parameters) :
+        SGDTrainerBase(parameters.randomSeedString),
+        _lossFunction(lossFunction),
+        _parameters(parameters)
+    {
+    }
+
+    template <typename LossFunctionType>
+    void SparseDataSGDTrainer<LossFunctionType>::DoFirstStep(const data::AutoDataVector& x, double y, double weight)
+    {
+        ResizeTo(x);
+        _t = 1.0;
+        double g = weight * _lossFunction.GetDerivative(0, y);
+        _v.Transpose() += g * x;
+        _a += g;
+        _c = _a;
+        _h = 1.0;
+    }
+
+    template <typename LossFunctionType>
+    void SparseDataSGDTrainer<LossFunctionType>::DoNextStep(const data::AutoDataVector& x, double y, double weight)
+    {
+        ResizeTo(x);
+        ++_t;
+
+        // apply the predictor
+        const double lambda = _parameters.regularization;
+        double d = x * _v;
+        double p = -(d + _a) / (lambda * (_t - 1.0));
+
+        // get the derivative
+        double g = weight * _lossFunction.GetDerivative(p, y);
+
+        // update
+        _v.Transpose() += g * x;
+        _a += g;
+        _u.Transpose() += _h * g * x;
+        _c += _a / _t;
+        _h += 1.0 / _t;
+    }
+
+    template <typename LossFunctionType>
+    auto SparseDataSGDTrainer<LossFunctionType>::GetLastPredictor() const -> const PredictorType&
+    {
+        const double lambda = _parameters.regularization;
+        _lastPredictor.Resize(_v.Size());
+        auto& w = _lastPredictor.GetWeights();
+
+        // define last predictor based on _v, _a, _t
+        w.Reset();
+        w += (-1 / (lambda * _t)) * _v;
+        _lastPredictor.GetBias() = -_a / (lambda * _t);
+        return _lastPredictor;
+    }
+
+    template <typename LossFunctionType>
+    auto SparseDataSGDTrainer<LossFunctionType>::GetAveragedPredictor() const -> const PredictorType&
+    {
+        const double lambda = _parameters.regularization;
+        _averagedPredictor.Resize(_v.Size());
+        auto& w = _averagedPredictor.GetWeights();
+
+        // define averaged predictor based on _v, _h, _u, _t
+        w.Reset();
+        w += -_h / (lambda * _t) * _v;
+        w += 1 / (lambda * _t) * _u;
+
+        _averagedPredictor.GetBias() = -_c / (lambda * _t);
+        return _averagedPredictor;
+    }
+
+    template <typename LossFunctionType>
+    inline void SparseDataSGDTrainer<LossFunctionType>::ResizeTo(const data::AutoDataVector& x)
+    {
+        auto xSize = x.PrefixLength();
+        if (xSize > _v.Size())
+        {
+            _v.Resize(xSize);
+            _u.Resize(xSize);
+        }
+    }
+
+    //
+    // SparseDataCenteredSGDTrainer
+    //
+
+    template <typename LossFunctionType>
+    SparseDataCenteredSGDTrainer<LossFunctionType>::SparseDataCenteredSGDTrainer(const LossFunctionType& lossFunction, math::RowVector<double> center, const SGDTrainerParameters& parameters) :
+        SGDTrainerBase(parameters.randomSeedString),
+        _lossFunction(lossFunction),
+        _parameters(parameters),
+        _center(std::move(center))
+    {
+        _theta = 1 + _center.Norm2Squared();
+    }
+
+    template <typename LossFunctionType>
+    void SparseDataCenteredSGDTrainer<LossFunctionType>::DoFirstStep(const data::AutoDataVector& x, double y, double weight)
+    {
+        ResizeTo(x);
+        _t = 1.0;
+
+        // first, perform the standard SparseDataSGD step
+        double g = weight * _lossFunction.GetDerivative(0, y);
+        _v.Transpose() += g * x;
+        _a += g;
+        _c = _a;
+        _h = 1.0;
+
+        // next, perform the special steps needed for centering
+        double q = x * _center.Transpose();
+        _z = g * q;
+        _r = _a * _theta - _z;
+        _s = _r;
+    }
+
+    template <typename LossFunctionType>
+    void SparseDataCenteredSGDTrainer<LossFunctionType>::DoNextStep(const data::AutoDataVector& x, double y, double weight)
+    {
+        ResizeTo(x);
+        ++_t;
+
+        // apply the predictor
+        const double lambda = _parameters.regularization;
+        double d = x * _v;
+        double q = x * _center.Transpose();
+        double p = -(d + _r - _a * q) / (lambda * (_t - 1.0));
+
+        // get the derivative
+        double g = weight * _lossFunction.GetDerivative(p, y);
+
+        // apply the SparseDataSGD update
+        _v.Transpose() += g * x;
+        _a += g;
+        _u.Transpose() += _h * g * x;
+        _c += _a / _t;
+        _h += 1.0 / _t;
+
+        // next, perform the special steps needed for centering
+        _z += g * q;
+        _r = _a * _theta - _z;
+        _s += _r / _t;
+    }
+
+    template <typename LossFunctionType>
+    auto SparseDataCenteredSGDTrainer<LossFunctionType>::GetLastPredictor() const -> const PredictorType&
+    {
+        const double lambda = _parameters.regularization;
+        _lastPredictor.Resize(_v.Size());
+        auto& w = _lastPredictor.GetWeights();
+        w += (-1 / (lambda * _t)) * _v;
+        _lastPredictor.GetBias() = -_a / (lambda * _t);
+        return _lastPredictor;
+    }
+
+    template <typename LossFunctionType>
+    auto SparseDataCenteredSGDTrainer<LossFunctionType>::GetAveragedPredictor() const -> const PredictorType&
+    {
+        const double lambda = _parameters.regularization;
+        const double coeff = 1.0 / (lambda * _t);
+        _averagedPredictor.Resize(_v.Size());
+        auto& w = _averagedPredictor.GetWeights();
+
+        // define last predictor based on _v, _u, _c
+        w.Reset();
+        w += -_h * coeff * _v;
+        w += coeff * _u;
+        w += _c * coeff * _center.Transpose();
+
+        _averagedPredictor.GetBias() = -_s * coeff;
+        return _averagedPredictor;
+    }
+
+    template <typename LossFunctionType>
+    inline void SparseDataCenteredSGDTrainer<LossFunctionType>::ResizeTo(const data::AutoDataVector& x)
+    {
+        auto xSize = x.PrefixLength();
+        if (xSize > _v.Size())
+        {
+            _v.Resize(xSize);
+            _u.Resize(xSize);
+        }
+    }
+
+    //
+    // Helper functions
+    //
+
+    template <typename LossFunctionType>
+    std::unique_ptr<ITrainer<predictors::LinearPredictor<double>>> MakeSGDTrainer(const LossFunctionType& lossFunction, const SGDTrainerParameters& parameters)
+    {
+        return std::make_unique<SGDTrainer<LossFunctionType>>(lossFunction, parameters);
+    }
+
+    template <typename LossFunctionType>
+    std::unique_ptr<ITrainer<predictors::LinearPredictor<double>>> MakeSparseDataSGDTrainer(const LossFunctionType& lossFunction, const SGDTrainerParameters& parameters)
+    {
+        return std::make_unique<SparseDataSGDTrainer<LossFunctionType>>(lossFunction, parameters);
+    }
+
+    template <typename LossFunctionType>
+    std::unique_ptr<ITrainer<predictors::LinearPredictor<double>>> MakeSparseDataCenteredSGDTrainer(const LossFunctionType& lossFunction, math::RowVector<double> center, const SGDTrainerParameters& parameters)
+    {
+        return std::make_unique<SparseDataCenteredSGDTrainer<LossFunctionType>>(lossFunction, std::move(center), parameters);
+    }
+} // namespace trainers
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/trainers/include/SortingForestTrainer.h b/libraries/trainers/include/SortingForestTrainer.h
index fa5ae33e9..c8dc86dd1 100644
--- a/libraries/trainers/include/SortingForestTrainer.h
+++ b/libraries/trainers/include/SortingForestTrainer.h
@@ -76,4 +76,103 @@ namespace trainers
 } // namespace trainers
 } // namespace ell
 
-#include "../tcc/SortingForestTrainer.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace trainers
+{
+    template <typename LossFunctionType, typename BoosterType>
+    SortingForestTrainer<LossFunctionType, BoosterType>::SortingForestTrainer(const LossFunctionType& lossFunction, const BoosterType& booster, const SortingForestTrainerParameters& parameters) :
+        ForestTrainer<SplitRuleType, EdgePredictorType, BoosterType>(booster, parameters),
+        _lossFunction(lossFunction)
+    {
+    }
+
+    template <typename LossFunctionType, typename BoosterType>
+    auto SortingForestTrainer<LossFunctionType, BoosterType>::GetBestSplitRuleAtNode(SplittableNodeId nodeId, Range range, Sums sums) -> SplitCandidate
+    {
+        auto numFeatures = _dataset.NumFeatures();
+
+        SplitCandidate bestSplitCandidate(nodeId, range, sums);
+
+        for (size_t inputIndex = 0; inputIndex < numFeatures; ++inputIndex)
+        {
+            // sort the relevant rows of data set in ascending order by inputIndex
+            SortNodeDataset(range, inputIndex);
+
+            Sums sums0;
+
+            // consider all thresholds
+            double nextFeatureValue = _dataset[range.firstIndex].GetDataVector()[inputIndex];
+            for (size_t rowIndex = range.firstIndex; rowIndex < range.firstIndex + range.size - 1; ++rowIndex)
+            {
+                // get friendly names
+                double currentFeatureValue = nextFeatureValue;
+                nextFeatureValue = _dataset[rowIndex + 1].GetDataVector()[inputIndex];
+
+                // increment sums
+                sums0.Increment(_dataset[rowIndex].GetMetadata().weak);
+
+                // only split between rows with different feature values
+                if (currentFeatureValue == nextFeatureValue)
+                {
+                    continue;
+                }
+
+                // compute sums1 and gain
+                auto sums1 = sums - sums0;
+                double gain = CalculateGain(sums, sums0, sums1);
+
+                // find gain maximizer
+                if (gain > bestSplitCandidate.gain)
+                {
+                    bestSplitCandidate.gain = gain;
+                    bestSplitCandidate.splitRule = SplitRuleType{ inputIndex, 0.5 * (currentFeatureValue + nextFeatureValue) };
+                    bestSplitCandidate.ranges.SplitChildRange(0, rowIndex - range.firstIndex + 1);
+                    bestSplitCandidate.stats.SetChildSums({ sums0, sums1 });
+                }
+            }
+        }
+        return bestSplitCandidate;
+    }
+
+    template <typename LossFunctionType, typename BoosterType>
+    auto SortingForestTrainer<LossFunctionType, BoosterType>::GetEdgePredictors(const NodeStats& nodeStats) -> std::vector<EdgePredictorType>
+    {
+        double output = nodeStats.GetTotalSums().GetMeanLabel();
+        double output0 = nodeStats.GetChildSums(0).GetMeanLabel() - output;
+        double output1 = nodeStats.GetChildSums(1).GetMeanLabel() - output;
+        return std::vector<EdgePredictorType>{ output0, output1 };
+    }
+
+    template <typename LossFunctionType, typename BoosterType>
+    void SortingForestTrainer<LossFunctionType, BoosterType>::SortNodeDataset(Range range, size_t inputIndex)
+    {
+        _dataset.Sort([inputIndex](const data::Example<DataVectorType, TrainerMetadata>& example) { return example.GetDataVector()[inputIndex]; },
+                      range.firstIndex,
+                      range.size);
+    }
+
+    template <typename LossFunctionType, typename BoosterType>
+    double SortingForestTrainer<LossFunctionType, BoosterType>::CalculateGain(const Sums& sums, const Sums& sums0, const Sums& sums1) const
+    {
+        if (sums0.sumWeights == 0 || sums1.sumWeights == 0)
+        {
+            return 0;
+        }
+
+        return sums0.sumWeights * _lossFunction.BregmanGenerator(sums0.sumWeightedLabels / sums0.sumWeights) +
+               sums1.sumWeights * _lossFunction.BregmanGenerator(sums1.sumWeightedLabels / sums1.sumWeights) -
+               sums.sumWeights * _lossFunction.BregmanGenerator(sums.sumWeightedLabels / sums.sumWeights);
+    }
+
+    template <typename LossFunctionType, typename BoosterType>
+    std::unique_ptr<ITrainer<predictors::SimpleForestPredictor>> MakeSortingForestTrainer(const LossFunctionType& lossFunction, const BoosterType& booster, const SortingForestTrainerParameters& parameters)
+    {
+        return std::make_unique<SortingForestTrainer<LossFunctionType, BoosterType>>(lossFunction, booster, parameters);
+    }
+} // namespace trainers
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/trainers/include/SweepingTrainer.h b/libraries/trainers/include/SweepingTrainer.h
index dac3a78b5..01742581f 100644
--- a/libraries/trainers/include/SweepingTrainer.h
+++ b/libraries/trainers/include/SweepingTrainer.h
@@ -70,4 +70,58 @@ namespace trainers
 } // namespace trainers
 } // namespace ell
 
-#include "../tcc/SweepingTrainer.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace trainers
+{
+    template <typename PredictorType>
+    SweepingTrainer<PredictorType>::SweepingTrainer(std::vector<EvaluatingTrainerType>&& evaluatingTrainers) :
+        _evaluatingTrainers(std::move(evaluatingTrainers))
+    {
+        assert(_evaluatingTrainers.size() > 0);
+    }
+
+    template <typename PredictorType>
+    void SweepingTrainer<PredictorType>::SetDataset(const data::AnyDataset& anyDataset)
+    {
+        _dataset = data::Dataset<ExampleType>(anyDataset);
+    }
+
+    template <typename PredictorType>
+    void SweepingTrainer<PredictorType>::Update()
+    {
+        for (size_t i = 0; i < _evaluatingTrainers.size(); ++i)
+        {
+            _evaluatingTrainers[i].Update();
+        }
+    }
+
+    template <typename PredictorType>
+    const PredictorType& SweepingTrainer<PredictorType>::GetPredictor() const
+    {
+        double bestGoodness = _evaluatingTrainers[0].GetEvaluator()->GetGoodness();
+        size_t bestIndex = 0;
+        for (size_t i = 1; i < _evaluatingTrainers.size(); ++i)
+        {
+            double goodness = _evaluatingTrainers[i].GetEvaluator()->GetGoodness();
+            if (goodness > bestGoodness)
+            {
+                bestGoodness = goodness;
+                bestIndex = i;
+            }
+        }
+
+        return _evaluatingTrainers[bestIndex].GetPredictor();
+    }
+
+    template <typename PredictorType>
+    std::unique_ptr<ITrainer<PredictorType>> MakeSweepingTrainer(std::vector<EvaluatingTrainer<PredictorType>>&& evaluatingTrainers)
+    {
+        return std::make_unique<SweepingTrainer<PredictorType>>(std::move(evaluatingTrainers));
+    }
+} // namespace trainers
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/trainers/include/ThresholdFinder.h b/libraries/trainers/include/ThresholdFinder.h
index b31fdcd01..71b66c98a 100644
--- a/libraries/trainers/include/ThresholdFinder.h
+++ b/libraries/trainers/include/ThresholdFinder.h
@@ -63,4 +63,70 @@ namespace trainers
 } // namespace trainers
 } // namespace ell
 
-#include "../tcc/ThresholdFinder.tcc"
+#pragma region implementation
+
+#include <algorithm>
+
+namespace ell
+{
+namespace trainers
+{
+    template <typename ExampleIteratorType>
+    ThresholdFinder::UniqueValuesResult ThresholdFinder::UniqueValues(ExampleIteratorType exampleIterator) const
+    {
+        std::vector<std::vector<ValueWeight>> result;
+        double totalWeight = 0.0;
+
+        // invert and densify result
+        while (exampleIterator.IsValid())
+        {
+            const auto& example = exampleIterator.Get();
+            const auto& denseDataVector = example.GetDataVector();
+            double weight = example.GetMetadata().weak.weight;
+
+            totalWeight += weight;
+
+            if (result.size() < denseDataVector.PrefixLength())
+            {
+                result.resize(denseDataVector.PrefixLength());
+            }
+
+            for (size_t j = 0; j < denseDataVector.PrefixLength(); ++j)
+            {
+                result[j].push_back({ denseDataVector[j], weight });
+            }
+
+            exampleIterator.Next();
+        }
+
+        // sort and unique each feature
+        for (size_t j = 0; j < result.size(); ++j)
+        {
+            auto newSize = SortReduceCopy(result[j].begin(), result[j].end());
+            result[j].resize(newSize);
+        }
+
+        return { result, totalWeight };
+    }
+
+    template <typename ExampleIteratorType>
+    std::vector<predictors::SingleElementThresholdPredictor> trainers::ExhaustiveThresholdFinder::GetThresholds(ExampleIteratorType exampleIterator) const
+    {
+        auto uniqueValuesResult = UniqueValues(exampleIterator);
+        std::vector<predictors::SingleElementThresholdPredictor> thresholdPredictors;
+
+        for (size_t j = 0; j < uniqueValuesResult.weightedValues.size(); ++j)
+        {
+            const auto& featureValues = uniqueValuesResult.weightedValues[j];
+            for (size_t i = 0; i < featureValues.size() - 1; ++i)
+            {
+                thresholdPredictors.push_back({ j, 0.5 * (featureValues[i].value + featureValues[i + 1].value) });
+            }
+        }
+
+        return thresholdPredictors;
+    }
+} // namespace trainers
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/trainers/optimization/CMakeLists.txt b/libraries/trainers/optimization/CMakeLists.txt
index 222cddd59..6fb1acb87 100644
--- a/libraries/trainers/optimization/CMakeLists.txt
+++ b/libraries/trainers/optimization/CMakeLists.txt
@@ -30,33 +30,10 @@ set (include include/AbsoluteLoss.h
              include/VectorSolution.h
 )
 
-set (tcc tcc/AbsoluteLoss.tcc
-         tcc/ElasticNetRegularizer.tcc
-         tcc/Expression.tcc
-         tcc/GoldenSectionMinimizer.tcc
-         tcc/HingeLoss.tcc
-         tcc/HuberLoss.tcc
-         tcc/IndexedContainer.tcc
-         tcc/L2Regularizer.tcc
-         tcc/LogisticLoss.tcc
-         tcc/MatrixExampleSet.tcc
-         tcc/MatrixSolution.tcc
-         tcc/MaxRegularizer.tcc
-         tcc/MultivariateLoss.tcc
-         tcc/OptimizationExample.tcc
-         tcc/SmoothedHingeLoss.tcc
-         tcc/SquaredHingeLoss.tcc
-         tcc/SquareLoss.tcc
-         tcc/SDCAOptimizer.tcc
-         tcc/SGDOptimizer.tcc
-         tcc/VectorSolution.tcc
-)
-
 source_group("src" FILES ${src})
 source_group("include" FILES ${include})
-source_group("tcc" FILES ${tcc})
 
-add_library(${library_name} ${src} ${include} ${tcc} ${doc})
+add_library(${library_name} ${src} ${include} ${doc})
 target_include_directories(${library_name} PRIVATE include ${ELL_LIBRARIES_DIR})
 target_link_libraries(${library_name} math)
 
@@ -86,16 +63,10 @@ set (test_include test/include/LossFunction_test.h
                   test/include/Regularizer_test.h
                   test/include/Solution_test.h)
 
-set (test_tcc test/tcc/LossFunction_test.tcc
-              test/tcc/Optimizer_test.tcc
-              test/tcc/RandomExampleSet.tcc
-              test/tcc/Solution_test.tcc)
-
 source_group("src" FILES ${test_src})
 source_group("include" FILES ${test_include})
-source_group("tcc" FILES ${test_tcc})
 
-add_executable(${test_name} ${test_src} ${test_include} ${test_tcc} ${include})
+add_executable(${test_name} ${test_src} ${test_include} ${include})
 target_include_directories(${test_name} PRIVATE test/include ${ELL_LIBRARIES_DIR})
 target_link_libraries(${test_name} optimization testing)
 copy_shared_libraries(${test_name})
diff --git a/libraries/trainers/optimization/include/AbsoluteLoss.h b/libraries/trainers/optimization/include/AbsoluteLoss.h
index bdba04e11..0e05fd6f6 100644
--- a/libraries/trainers/optimization/include/AbsoluteLoss.h
+++ b/libraries/trainers/optimization/include/AbsoluteLoss.h
@@ -69,4 +69,63 @@ namespace trainers
 } // namespace trainers
 } // namespace ell
 
-#include "../tcc/AbsoluteLoss.tcc"
+#pragma region implementation
+
+#include <cmath>
+
+namespace ell
+{
+namespace trainers
+{
+    namespace optimization
+    {
+        template <typename OutputType>
+        double AbsoluteLoss::Value(double prediction, OutputType output)
+        {
+            return std::abs(prediction - output);
+        }
+
+        template <typename OutputType>
+        double AbsoluteLoss::Derivative(double prediction, OutputType output)
+        {
+            if (prediction == output)
+            {
+                return 0.0;
+            }
+            if (prediction < output)
+            {
+                return -1.0;
+            }
+            return 1.0;
+        }
+
+        template <typename OutputType>
+        double AbsoluteLoss::Conjugate(double v, OutputType output)
+        {
+            if (-1.0 <= v && v <= 1.0)
+            {
+                return output * v;
+            }
+            return std::numeric_limits<double>::infinity();
+        }
+
+        template <typename OutputType>
+        double AbsoluteLoss::ConjugateProx(double theta, double z, OutputType output)
+        {
+            double a = z - theta * output;
+
+            if (a <= -1.0)
+            {
+                return -1.0;
+            }
+            if (a >= 1.0)
+            {
+                return 1.0;
+            }
+            return a;
+        }
+    } // namespace optimization
+} // namespace trainers
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/trainers/optimization/include/ElasticNetRegularizer.h b/libraries/trainers/optimization/include/ElasticNetRegularizer.h
index 281a14b21..83cfadb64 100644
--- a/libraries/trainers/optimization/include/ElasticNetRegularizer.h
+++ b/libraries/trainers/optimization/include/ElasticNetRegularizer.h
@@ -52,4 +52,39 @@ namespace trainers
 } // namespace trainers
 } // namespace ell
 
-#include "../tcc/ElasticNetRegularizer.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace trainers
+{
+    namespace optimization
+    {
+        template <typename SolutionType>
+        double ElasticNetRegularizer::Value(const SolutionType& w) const
+        {
+            return 0.5 * Norm2Squared(w) + _beta * w.GetVector().Norm1(); // note: Norm1 does not include the bias term
+        }
+
+        template <typename SolutionType>
+        double ElasticNetRegularizer::Conjugate(const SolutionType& v) const
+        {
+            SolutionType w = v;
+            L1Prox(w.GetVector(), _beta); // note: L1 term does not apply to the bias term
+            double result = -_beta * w.GetVector().Norm1();
+            w -= v;
+            result += 0.5 * (Norm2Squared(v) - Norm2Squared(w));
+            return result;
+        }
+
+        template <typename SolutionType>
+        void ElasticNetRegularizer::ConjugateGradient(const SolutionType& v, SolutionType& w) const
+        {
+            w = v;
+            L1Prox(w.GetVector(), _beta); // note: L1Prox does not apply to the bias term
+        }
+    } // namespace optimization
+} // namespace trainers
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/trainers/optimization/include/Expression.h b/libraries/trainers/optimization/include/Expression.h
index 36568253d..8ac672a90 100644
--- a/libraries/trainers/optimization/include/Expression.h
+++ b/libraries/trainers/optimization/include/Expression.h
@@ -95,4 +95,47 @@ namespace trainers
 } // namespace trainers
 } // namespace ell
 
-#include "../tcc/Expression.tcc"
\ No newline at end of file
+#pragma region implementation
+
+#pragma once
+
+namespace ell
+{
+namespace trainers
+{
+    namespace optimization
+    {
+        template <Operation operation, typename LeftType, typename RightType>
+        Expression<operation, LeftType, RightType> MakeExpression(const LeftType& lhs, const RightType& rhs)
+        {
+            return Expression<operation, LeftType, RightType>(lhs, rhs);
+        }
+
+        template <typename ElementType>
+        ScaledColumnVectorExpression<ElementType> operator*(math::ConstColumnVectorReference<ElementType> vectorReference, double scalar)
+        {
+            return MakeExpression<Operation::product>(vectorReference, scalar);
+        }
+
+        template <typename ElementType>
+        OuterProductExpression<ElementType> operator*(math::ConstColumnVectorReference<ElementType> columnVectorReference, math::ConstRowVectorReference<double> rowVectorReference)
+        {
+            return MakeExpression<Operation::product>(columnVectorReference, rowVectorReference);
+        }
+
+        template <typename T, IsScalable<T> Concept>
+        ScaledExpression<T> operator*(const T& scalable, double scalar)
+        {
+            return MakeExpression<Operation::product>(std::ref(scalable), scalar);
+        }
+
+        template <typename T1, typename T2, IsSummable<T1> Concept1, IsSummable<T2> Concept2>
+        SumExpression<T1, T2> operator+(T1 summable1, T2 summable2)
+        {
+            return MakeExpression<Operation::sum>(summable1, summable2);
+        }
+    } // namespace optimization
+} // namespace trainers
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/trainers/optimization/include/GoldenSectionMinimizer.h b/libraries/trainers/optimization/include/GoldenSectionMinimizer.h
index 1d3056b1b..5298900e5 100644
--- a/libraries/trainers/optimization/include/GoldenSectionMinimizer.h
+++ b/libraries/trainers/optimization/include/GoldenSectionMinimizer.h
@@ -70,4 +70,101 @@ namespace trainers
 } // namespace trainers
 } // namespace ell
 
-#include "../tcc/GoldenSectionMinimizer.tcc"
+#pragma region implementation
+
+#pragma once
+
+namespace ell
+{
+namespace trainers
+{
+    namespace optimization
+    {
+        template <typename FunctionType>
+        GoldenSectionMinimizer<FunctionType>::GoldenSectionMinimizer(FunctionType function, double lower, double upper) :
+            _boundary1(lower),
+            _boundary2(upper),
+            _function(std::move(function))
+        {
+            _minPoint = _goldenComplement * _boundary1 + _golden * _boundary2;
+            _minPointValue = _function(_minPoint);
+            _boundary1Value = _function(_boundary1);
+            _boundary2Value = _function(_boundary2);
+        }
+
+        template <typename FunctionType>
+        void GoldenSectionMinimizer<FunctionType>::Step(size_t iterations)
+        {
+            for (size_t i = 0; i < iterations; ++i)
+            {
+                Step();
+            }
+        }
+        template <typename FunctionType>
+        void GoldenSectionMinimizer<FunctionType>::MinimizeToPrecision(double precision)
+        {
+            do
+            {
+                Step();
+            } while (GetPrecision() > precision);
+        }
+        template <typename FunctionType>
+        void GoldenSectionMinimizer<FunctionType>::Step()
+        {
+            double newPoint = _goldenComplement * _boundary1 + _golden * _minPoint;
+            double newPointValue = _function(newPoint);
+            if (newPointValue < _minPointValue)
+            {
+                _boundary2 = _minPoint;
+                _boundary2Value = _minPointValue;
+                _minPoint = newPoint;
+                _minPointValue = newPointValue;
+            }
+            else
+            {
+                _boundary1 = _boundary2;
+                _boundary1Value = _boundary2Value;
+                _boundary2 = newPoint;
+                _boundary2Value = newPointValue;
+            }
+        }
+        template <typename FunctionType>
+        double GoldenSectionMinimizer<FunctionType>::GetArgMinLowerBound() const
+        {
+            return std::min(_boundary1, _boundary2);
+        }
+        template <typename FunctionType>
+        double GoldenSectionMinimizer<FunctionType>::GetArgMinUpperBound() const
+        {
+            return std::max(_boundary1, _boundary2);
+        }
+        template <typename FunctionType>
+        double GoldenSectionMinimizer<FunctionType>::GetApproximateArgMin() const
+        {
+            return 0.5 * (_boundary1 + _boundary2);
+        }
+
+        template <typename FunctionType>
+        double GoldenSectionMinimizer<FunctionType>::GetMinUpperBound() const
+        {
+            return _minPointValue;
+        }
+
+        template <typename FunctionType>
+        double GoldenSectionMinimizer<FunctionType>::GetMinLowerBound() const
+        {
+            double min1 = _boundary1Value * (1.0 - 1.0 / _golden) + _minPointValue / _golden;
+            double min2 = _boundary2Value * (1.0 - 1.0 / _goldenComplement) + _minPointValue / _goldenComplement;
+            return std::min(min1, min2);
+        }
+
+        template <typename FunctionType>
+        double GoldenSectionMinimizer<FunctionType>::GetPrecision() const
+        {
+            return GetMinUpperBound() - GetMinLowerBound();
+        }
+    } // namespace optimization
+} // namespace trainers
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/trainers/optimization/include/HingeLoss.h b/libraries/trainers/optimization/include/HingeLoss.h
index fdd1740b9..f0c33610c 100644
--- a/libraries/trainers/optimization/include/HingeLoss.h
+++ b/libraries/trainers/optimization/include/HingeLoss.h
@@ -68,4 +68,85 @@ namespace trainers
 } // namespace trainers
 } // namespace ell
 
-#include "../tcc/HingeLoss.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace trainers
+{
+    namespace optimization
+    {
+        template <typename OutputType>
+        bool HingeLoss::VerifyOutput(OutputType output)
+        {
+            if (output == 1.0 || output == -1.0)
+            {
+                return true;
+            }
+            return false;
+        }
+
+        template <typename OutputType>
+        double HingeLoss::Value(double prediction, OutputType output)
+        {
+            DEBUG_CHECK(VerifyOutput(output), "Hinge Loss requires binary outputs");
+
+            double margin = prediction * output;
+
+            if (margin >= 1.0)
+            {
+                return 0.0;
+            }
+            return 1.0 - margin;
+        }
+
+        template <typename OutputType>
+        double HingeLoss::Derivative(double prediction, OutputType output)
+        {
+            DEBUG_CHECK(VerifyOutput(output), "Hinge Loss requires binary outputs");
+
+            double margin = prediction * output;
+
+            if (margin >= 1.0)
+            {
+                return 0.0;
+            }
+            return -output;
+        }
+
+        template <typename OutputType>
+        double HingeLoss::Conjugate(double v, OutputType output)
+        {
+            DEBUG_CHECK(VerifyOutput(output), "Hinge Loss requires binary outputs");
+
+            double a = output * v;
+
+            if (-1.0 <= a && a <= 0.0)
+            {
+                return a;
+            }
+            return std::numeric_limits<double>::infinity();
+        }
+
+        template <typename OutputType>
+        double HingeLoss::ConjugateProx(double theta, double z, OutputType output)
+        {
+            DEBUG_CHECK(VerifyOutput(output), "Hinge Loss requires binary outputs");
+
+            double a = output * z;
+
+            if (a < theta - 1.0)
+            {
+                return -output;
+            }
+            if (a <= theta)
+            {
+                return z - theta * output;
+            }
+            return 0.0;
+        }
+    } // namespace optimization
+} // namespace trainers
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/trainers/optimization/include/HuberLoss.h b/libraries/trainers/optimization/include/HuberLoss.h
index ce33177af..541ab58f5 100644
--- a/libraries/trainers/optimization/include/HuberLoss.h
+++ b/libraries/trainers/optimization/include/HuberLoss.h
@@ -78,4 +78,69 @@ namespace trainers
 } // namespace trainers
 } // namespace ell
 
-#include "../tcc/HuberLoss.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace trainers
+{
+    namespace optimization
+    {
+        template <typename OutputType>
+        double HuberLoss::Value(double prediction, OutputType output) const
+        {
+            double residual = prediction - output;
+
+            if (residual >= -_gamma && residual <= _gamma)
+            {
+                return 0.5 / _gamma * residual * residual;
+            }
+            return std::abs(residual) - 0.5 * _gamma;
+        }
+
+        template <typename OutputType>
+        double HuberLoss::Derivative(double prediction, OutputType output) const
+        {
+            double residual = prediction - output;
+
+            if (residual >= -_gamma && residual <= _gamma)
+            {
+                return residual / _gamma;
+            }
+            if (residual > 0)
+            {
+                return 1.0;
+            }
+            return -1.0;
+        }
+
+        template <typename OutputType>
+        double HuberLoss::Conjugate(double v, OutputType output) const
+        {
+            if (-1.0 <= v && v <= 1.0)
+            {
+                return output * v + 0.5 * _gamma * v * v;
+            }
+            return std::numeric_limits<double>::infinity();
+        }
+
+        template <typename OutputType>
+        double HuberLoss::ConjugateProx(double theta, double z, OutputType output) const
+        {
+            double a = (z - theta * output) / (1 + theta * _gamma);
+
+            if (a <= -1.0)
+            {
+                return -1.0;
+            }
+            if (a >= 1.0)
+            {
+                return 1.0;
+            }
+            return a;
+        }
+    } // namespace optimization
+} // namespace trainers
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/trainers/optimization/include/IndexedContainer.h b/libraries/trainers/optimization/include/IndexedContainer.h
index 4636567bb..35300ec25 100644
--- a/libraries/trainers/optimization/include/IndexedContainer.h
+++ b/libraries/trainers/optimization/include/IndexedContainer.h
@@ -47,4 +47,23 @@ namespace trainers
 } // namespace trainers
 } // namespace ell
 
-#include "../tcc/IndexedContainer.tcc"
\ No newline at end of file
+#pragma region implementation
+
+#pragma once
+
+namespace ell
+{
+namespace trainers
+{
+    namespace optimization
+    {
+        template <typename VectorElementType, typename IndexedContainerElementType>
+        IndexedContainerElementType VectorIndexedContainer<VectorElementType, IndexedContainerElementType>::Get(size_t index) const
+        {
+            return IndexedContainerElementType((*this)[index]);
+        }
+    } // namespace optimization
+} // namespace trainers
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/trainers/optimization/include/L2Regularizer.h b/libraries/trainers/optimization/include/L2Regularizer.h
index f9b84abb2..9ebda8814 100644
--- a/libraries/trainers/optimization/include/L2Regularizer.h
+++ b/libraries/trainers/optimization/include/L2Regularizer.h
@@ -42,4 +42,33 @@ namespace trainers
 } // namespace trainers
 } // namespace ell
 
-#include "../tcc/L2Regularizer.tcc"
\ No newline at end of file
+#pragma region implementation
+
+namespace ell
+{
+namespace trainers
+{
+    namespace optimization
+    {
+        template <typename SolutionType>
+        double L2Regularizer::Value(const SolutionType& w)
+        {
+            return 0.5 * Norm2Squared(w);
+        }
+
+        template <typename SolutionType>
+        double L2Regularizer::Conjugate(const SolutionType& v)
+        {
+            return 0.5 * Norm2Squared(v);
+        }
+
+        template <typename SolutionType>
+        void L2Regularizer::ConjugateGradient(const SolutionType& v, SolutionType& w)
+        {
+            w = v;
+        }
+    } // namespace optimization
+} // namespace trainers
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/trainers/optimization/include/LogisticLoss.h b/libraries/trainers/optimization/include/LogisticLoss.h
index 01e8620cb..49f0a3fac 100644
--- a/libraries/trainers/optimization/include/LogisticLoss.h
+++ b/libraries/trainers/optimization/include/LogisticLoss.h
@@ -70,4 +70,101 @@ namespace trainers
 } // namespace trainers
 } // namespace ell
 
-#include "../tcc/LogisticLoss.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace trainers
+{
+    namespace optimization
+    {
+        template <typename OutputType>
+        bool LogisticLoss::VerifyOutput(OutputType output)
+        {
+            if (output == 1.0 || output == -1.0)
+            {
+                return true;
+            }
+            return false;
+        }
+
+        template <typename OutputType>
+        double LogisticLoss::Value(double prediction, OutputType output)
+        {
+            DEBUG_CHECK(VerifyOutput(output), "Logistic Loss requires binary outputs");
+
+            const double exponentLimit = 18.0;
+
+            double margin = prediction * output;
+
+            if (margin <= -exponentLimit)
+            {
+                return -margin;
+            }
+            return std::log1p(std::exp(-margin));
+        }
+
+        template <typename OutputType>
+        double LogisticLoss::Derivative(double prediction, OutputType output)
+        {
+            DEBUG_CHECK(VerifyOutput(output), "Logistic Loss requires binary outputs");
+
+            double margin = static_cast<double>(prediction * output);
+
+            if (margin <= 0.0)
+            {
+                return -output / (1 + std::exp(margin));
+            }
+            auto expNegMargin = std::exp(-margin);
+            return -output * expNegMargin / (1 + expNegMargin);
+        }
+
+        template <typename OutputType>
+        double LogisticLoss::Conjugate(double v, OutputType output)
+        {
+            DEBUG_CHECK(VerifyOutput(output), "Logistic Loss requires binary outputs");
+
+            const double conjugateBoundary = 1.0e-12;
+            double a = output * v;
+
+            if (a < -1.0 || a > 0.0)
+            {
+                return std::numeric_limits<double>::infinity();
+            }
+            if (a <= conjugateBoundary - 1.0 || -conjugateBoundary <= a)
+            {
+                return 0.0;
+            }
+            return (1.0 + a) * std::log1p(a) + (-a) * std::log(-a);
+        }
+
+        template <typename OutputType>
+        double LogisticLoss::ConjugateProx(double theta, double z, OutputType output)
+        {
+            DEBUG_CHECK(VerifyOutput(output), "Logistic Loss requires binary outputs");
+
+            const double conjugateBoundary = 1.0e-12;
+            const double conjugateProxDesiredDualityGap = 1.0e-6;
+            const size_t conjugateProxMaxIterations = 20;
+
+            double lowerBound = conjugateBoundary - 1.0;
+            double upperBound = -conjugateBoundary;
+
+            double a = output * z;
+            double b = std::min(upperBound, std::max(lowerBound, a));
+            double f = 0, df = 0;
+            for (size_t k = 0; k < conjugateProxMaxIterations; ++k)
+            {
+                f = b - a + theta * log((1.0 + b) / (-b));
+                if (std::abs(f) <= conjugateProxDesiredDualityGap) break;
+                df = 1.0 - theta / (b * (1.0 + b));
+                b -= f / df;
+                b = std::min(upperBound, std::max(lowerBound, b));
+            }
+            return b * output;
+        }
+    } // namespace optimization
+} // namespace trainers
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/trainers/optimization/include/MatrixExampleSet.h b/libraries/trainers/optimization/include/MatrixExampleSet.h
index 0fff38bd9..2a0ba756d 100644
--- a/libraries/trainers/optimization/include/MatrixExampleSet.h
+++ b/libraries/trainers/optimization/include/MatrixExampleSet.h
@@ -43,4 +43,32 @@ namespace trainers
 } // namespace trainers
 } // namespace ell
 
-#include "../tcc/MatrixExampleSet.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace trainers
+{
+    namespace optimization
+    {
+        template <typename ElementType>
+        MatrixExampleSet<ElementType>::MatrixExampleSet(math::RowMatrix<ElementType> input, math::RowMatrix<ElementType> output) :
+            _input(std::move(input)),
+            _output(std::move(output))
+        {
+            if (_input.NumRows() != _output.NumRows())
+            {
+                throw OptimizationException("Number of inputs and outputs don't match");
+            }
+        }
+
+        template <typename ElementType>
+        auto MatrixExampleSet<ElementType>::Get(size_t index) const -> ExampleType
+        {
+            return ExampleType(_input.GetRow(index), _output.GetRow(index));
+        }
+    } // namespace optimization
+} // namespace trainers
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/trainers/optimization/include/MatrixSolution.h b/libraries/trainers/optimization/include/MatrixSolution.h
index 8395f6a2a..92e9fd7f8 100644
--- a/libraries/trainers/optimization/include/MatrixSolution.h
+++ b/libraries/trainers/optimization/include/MatrixSolution.h
@@ -105,4 +105,193 @@ namespace trainers
 } // namespace trainers
 } // namespace ell
 
-#include "../tcc/MatrixSolution.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace trainers
+{
+    namespace optimization
+    {
+        template <typename IOElementType, bool isBiased>
+        void MatrixSolution<IOElementType, isBiased>::Resize(const InputType& inputExample, const OutputType& outputExample)
+        {
+            math::ColumnMatrix<double> matrix(inputExample.Size(), outputExample.Size());
+            _weights.Swap(matrix);
+
+            if constexpr (!isDouble)
+            {
+                _doubleInput.Resize(inputExample.Size());
+            }
+
+            if constexpr (isBiased)
+            {
+                _bias.Resize(outputExample.Size());
+            }
+        }
+
+        template <typename IOElementType, bool isBiased>
+        void MatrixSolution<IOElementType, isBiased>::operator=(const MatrixSolution<IOElementType, isBiased>& other)
+        {
+            _weights.CopyFrom(other._weights);
+
+            if constexpr (isBiased)
+            {
+                _bias.CopyFrom(other._bias);
+            }
+        }
+
+        template <typename IOElementType, bool isBiased>
+        void MatrixSolution<IOElementType, isBiased>::operator=(SumExpression<ScaledExpression<MatrixSolution<IOElementType, isBiased>>, ScaledExpression<MatrixSolution<IOElementType, isBiased>>> expression)
+        {
+            const auto& thisTerm = expression.lhs;
+            const auto& otherTerm = expression.rhs;
+
+            if (&(thisTerm.lhs.get()) != this)
+            {
+                throw OptimizationException("First term should be a scaled version of this solution");
+            }
+
+            double thisScale = thisTerm.rhs;
+            const auto& otherSolution = otherTerm.lhs.get();
+            double otherScale = otherTerm.rhs;
+            math::ScaleAddUpdate(otherScale, otherSolution._weights, thisScale, _weights);
+
+            if constexpr (isBiased)
+            {
+                math::ScaleAddUpdate(otherScale, otherSolution.GetBias(), thisScale, _bias);
+            }
+        }
+
+        template <typename IOElementType, bool isBiased>
+        void MatrixSolution<IOElementType, isBiased>::operator=(SumExpression<ScaledExpression<MatrixSolution<IOElementType, isBiased>>, OuterProductExpression<IOElementType>> expression)
+        {
+            const auto& thisTerm = expression.lhs;
+            const auto& updateTerm = expression.rhs;
+
+            if (&(thisTerm.lhs.get()) != this)
+            {
+                throw OptimizationException("The first term should be a scaled version of this solution");
+            }
+
+            double thisScale = thisTerm.rhs;
+            const auto& columnVectorReference = updateTerm.lhs;
+            const auto& rowVectorReference = updateTerm.rhs;
+            _weights *= thisScale;
+
+            if constexpr (isDouble)
+            {
+                math::RankOneUpdate(1.0, columnVectorReference, rowVectorReference, _weights);
+            }
+            else
+            {
+                auto doubleColumnVector = _doubleInput.Transpose();
+                doubleColumnVector.CopyFrom(columnVectorReference);
+                math::RankOneUpdate(1.0, doubleColumnVector, rowVectorReference, _weights);
+            }
+
+            if constexpr (isBiased)
+            {
+                math::ScaleAddUpdate(1.0, rowVectorReference, thisScale, _bias);
+            }
+        }
+
+        template <typename IOElementType, bool isBiased>
+        void MatrixSolution<IOElementType, isBiased>::operator-=(const MatrixSolution<IOElementType, isBiased>& other)
+        {
+            _weights -= other._weights;
+            if constexpr (isBiased)
+            {
+                _bias -= other._bias;
+            }
+        }
+
+        template <typename IOElementType, bool isBiased>
+        void MatrixSolution<IOElementType, isBiased>::operator+=(OuterProductExpression<IOElementType> expression)
+        {
+            const auto& columnVectorReference = expression.lhs;
+            const auto& rowVectorReference = expression.rhs;
+
+            if constexpr (isDouble)
+            {
+                math::RankOneUpdate(1.0, columnVectorReference, rowVectorReference, _weights);
+            }
+            else
+            {
+                auto doubleColumnVector = _doubleInput.Transpose();
+                doubleColumnVector.CopyFrom(columnVectorReference);
+                math::RankOneUpdate(1.0, doubleColumnVector, rowVectorReference, _weights);
+            }
+
+            if constexpr (isBiased)
+            {
+                math::ScaleAddUpdate(1.0, rowVectorReference, 1.0, _bias);
+            }
+        }
+
+        template <typename IOElementType, bool isBiased>
+        math::RowVector<double> MatrixSolution<IOElementType, isBiased>::Multiply(const InputType& input) const
+        {
+            math::RowVector<double> result(_weights.NumColumns());
+
+            if constexpr (isBiased)
+            {
+                result.CopyFrom(_bias);
+            }
+
+            if constexpr (isDouble)
+            {
+                math::MultiplyScaleAddUpdate(1.0, input, _weights, 1.0, result);
+            }
+            else
+            {
+                _doubleInput.CopyFrom(input);
+                math::MultiplyScaleAddUpdate(1.0, _doubleInput, _weights, 1.0, result);
+            }
+
+            return result;
+        }
+
+        template <typename IOElementType, bool isBiased>
+        double MatrixSolution<IOElementType, isBiased>::GetNorm2SquaredOf(const InputType& input)
+        {
+            double result = input.Norm2Squared();
+
+            if constexpr (isBiased)
+            {
+                result += 1.0;
+            }
+
+            return result;
+        }
+
+        template <typename IOElementType, bool isBiased>
+        void MatrixSolution<IOElementType, isBiased>::InitializeAuxiliaryVariable(AuxiliaryDoubleType& aux)
+        {
+            aux.Resize(_weights.NumColumns());
+            aux.Reset();
+        }
+
+        template <typename IOElementType, bool isBiased>
+        double Norm2Squared(const MatrixSolution<IOElementType, isBiased>& solution)
+        {
+            double result = solution.GetMatrix().ReferenceAsVector().Norm2Squared();
+
+            if constexpr (isBiased)
+            {
+                result += solution.GetBias().Norm2Squared();
+            }
+
+            return result;
+        }
+
+        template <typename IOElementType, bool isBiased>
+        math::RowVector<double> operator*(math::ConstRowVectorReference<IOElementType> input, const MatrixSolution<IOElementType, isBiased>& solution)
+        {
+            return solution.Multiply(input);
+        }
+    } // namespace optimization
+} // namespace trainers
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/trainers/optimization/include/MaxRegularizer.h b/libraries/trainers/optimization/include/MaxRegularizer.h
index 942fd3547..54592e94c 100644
--- a/libraries/trainers/optimization/include/MaxRegularizer.h
+++ b/libraries/trainers/optimization/include/MaxRegularizer.h
@@ -53,4 +53,39 @@ namespace trainers
 } // namespace trainers
 } // namespace ell
 
-#include "../tcc/MaxRegularizer.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace trainers
+{
+    namespace optimization
+    {
+        template <typename SolutionType>
+        double MaxRegularizer::Value(const SolutionType& w) const
+        {
+            return 0.5 * Norm2Squared(w) + _beta * w.GetVector().NormInfinity(); // note: NormInfinity does not include the bias term
+        }
+
+        template <typename SolutionType>
+        double MaxRegularizer::Conjugate(const SolutionType& v) const
+        {
+            SolutionType w = v;
+            LInfinityProx(w.GetVector(), _scratch, _beta); // note: LInfinity term does not apply to the bias term
+            double result = -_beta * w.GetVector().NormInfinity();
+            w -= v;
+            result += 0.5 * (Norm2Squared(v) - Norm2Squared(w));
+            return result;
+        }
+
+        template <typename SolutionType>
+        void MaxRegularizer::ConjugateGradient(const SolutionType& v, SolutionType& w) const
+        {
+            w = v;
+            LInfinityProx(w.GetVector(), _scratch, _beta); // note: LInfinityProx does not apply to the bias term
+        }
+    } // namespace optimization
+} // namespace trainers
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/trainers/optimization/include/MultivariateLoss.h b/libraries/trainers/optimization/include/MultivariateLoss.h
index afaef7122..633c147ff 100644
--- a/libraries/trainers/optimization/include/MultivariateLoss.h
+++ b/libraries/trainers/optimization/include/MultivariateLoss.h
@@ -78,4 +78,82 @@ namespace trainers
 } // namespace trainers
 } // namespace ell
 
-#include "../tcc/MultivariateLoss.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace trainers
+{
+    namespace optimization
+    {
+        template <typename LossType>
+        MultivariateLoss<LossType>::MultivariateLoss(LossType univariateLoss) :
+            _univariateLoss(std::move(univariateLoss))
+        {}
+
+        template <typename LossType>
+        template <typename OutputElementType>
+        bool MultivariateLoss<LossType>::VerifyOutput(math::ConstRowVectorReference<OutputElementType> output) const
+        {
+            for (size_t i = 0; i < output.Size(); ++i)
+            {
+                if (!_univariateLoss.VerifyOutput(output[i]))
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        template <typename LossType>
+        template <typename OutputElementType>
+        double MultivariateLoss<LossType>::Value(math::ConstRowVectorReference<double> prediction, math::ConstRowVectorReference<OutputElementType> output) const
+        {
+            double result = 0;
+            for (size_t i = 0; i < prediction.Size(); ++i)
+            {
+                result += _univariateLoss.Value(prediction[i], output[i]);
+            }
+            return result;
+        }
+
+        template <typename LossType>
+        template <typename OutputElementType>
+        math::RowVector<double> MultivariateLoss<LossType>::Derivative(math::ConstRowVectorReference<double> prediction, math::ConstRowVectorReference<OutputElementType> output) const
+        {
+            math::RowVector<double> result(prediction.Size());
+            for (size_t i = 0; i < prediction.Size(); ++i)
+            {
+                result[i] = _univariateLoss.Derivative(prediction[i], output[i]);
+            }
+            return result;
+        }
+
+        template <typename LossType>
+        template <typename OutputElementType>
+        double MultivariateLoss<LossType>::Conjugate(math::ConstRowVectorReference<double> dual, math::ConstRowVectorReference<OutputElementType> output) const
+        {
+            double result = 0;
+            for (size_t i = 0; i < dual.Size(); ++i)
+            {
+                result += _univariateLoss.Conjugate(dual[i], output[i]);
+            }
+            return result;
+        }
+
+        template <typename LossType>
+        template <typename OutputElementType>
+        math::RowVector<double> MultivariateLoss<LossType>::ConjugateProx(double sigma, math::ConstRowVectorReference<double> prediction, math::ConstRowVectorReference<OutputElementType> output) const
+        {
+            math::RowVector<double> result(prediction.Size());
+            for (size_t i = 0; i < prediction.Size(); ++i)
+            {
+                result[i] = _univariateLoss.ConjugateProx(sigma, prediction[i], output[i]);
+            }
+            return result;
+        }
+    } // namespace optimization
+} // namespace trainers
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/trainers/optimization/include/OptimizationExample.h b/libraries/trainers/optimization/include/OptimizationExample.h
index ff6932682..b7e0ed2e2 100644
--- a/libraries/trainers/optimization/include/OptimizationExample.h
+++ b/libraries/trainers/optimization/include/OptimizationExample.h
@@ -47,4 +47,34 @@ namespace trainers
 } // namespace trainers
 } // namespace ell
 
-#include "../tcc/OptimizationExample.tcc"
\ No newline at end of file
+#pragma region implementation
+
+#pragma once
+
+namespace ell
+{
+namespace trainers
+{
+    namespace optimization
+    {
+        template <typename InputT, typename OutputT>
+        Example<InputT, OutputT>::Example(InputType input, OutputType output, double weight) :
+            input(std::move(input)),
+            output(std::move(output)),
+            weight(weight)
+        {
+        }
+
+        template <typename InputT, typename OutputT>
+        template <typename InputBase, typename OutputBase>
+        Example<InputT, OutputT>::Example(const Example<InputBase, OutputBase>& other) :
+            input(other.input),
+            output(other.output),
+            weight(other.weight)
+        {
+        }
+    } // namespace optimization
+} // namespace trainers
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/trainers/optimization/include/SDCAOptimizer.h b/libraries/trainers/optimization/include/SDCAOptimizer.h
index a01aee3ff..86d967d0a 100644
--- a/libraries/trainers/optimization/include/SDCAOptimizer.h
+++ b/libraries/trainers/optimization/include/SDCAOptimizer.h
@@ -111,4 +111,154 @@ namespace trainers
 } // namespace trainers
 } // namespace ell
 
-#include "../tcc/SDCAOptimizer.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace trainers
+{
+    namespace optimization
+    {
+        template <typename SolutionType, typename LossFunctionType, typename RegularizerType>
+        SDCAOptimizer<SolutionType, LossFunctionType, RegularizerType>::SDCAOptimizer(std::shared_ptr<const ExampleSetType> examples, LossFunctionType lossFunction, RegularizerType regularizer, SDCAOptimizerParameters parameters) :
+            _examples(examples),
+            _lossFunction(lossFunction),
+            _regularizer(regularizer)
+        {
+            if (!examples || examples->Size() == 0)
+            {
+                throw OptimizationException("Empty dataset");
+            }
+
+            // set parameters
+            _lambda = parameters.regularization;
+            _desiredDualityGap = parameters.desiredDualityGap;
+            _permuteData = parameters.permuteData;
+
+            size_t numExamples = examples->Size();
+            _normalizedInverseLambda = 1.0 / (numExamples * parameters.regularization);
+
+            // set up random engine
+            std::seed_seq seed(parameters.randomSeedString.begin(), parameters.randomSeedString.end());
+            _randomEngine.seed(seed);
+
+            // resize data structures according to examples
+            auto firstExample = examples->Get(0);
+            _w.Resize(firstExample.input, firstExample.output);
+            _v.Resize(firstExample.input, firstExample.output);
+            _exampleInfo.resize(numExamples);
+
+            // initialize the per-example info, check that outputs are compatible with the loss, and compute primal objective
+            double primalSum = 0;
+            for (size_t i = 0; i < numExamples; ++i)
+            {
+                auto example = examples->Get(i);
+
+                if (!_lossFunction.VerifyOutput(example.output))
+                {
+                    throw OptimizationException("Discovered an output that is incompatible with the chosen loss function");
+                }
+
+                // cache the norm of the example
+                double norm2Squared = _w.GetNorm2SquaredOf(example.input);
+                _exampleInfo[i].norm2Squared = norm2Squared;
+
+                // initialize the dual
+                _w.InitializeAuxiliaryVariable(_exampleInfo[i].dual);
+
+                // compute the primal objective
+                auto prediction = example.input * _w;
+                primalSum += _lossFunction.Value(prediction, example.output);
+            }
+
+            _solutionInfo.primalObjective = primalSum / numExamples + _lambda * _regularizer.Value(_w);
+        }
+
+        template <typename SolutionType, typename LossFunctionType, typename RegularizerType>
+        void SDCAOptimizer<SolutionType, LossFunctionType, RegularizerType>::PerformEpochs(size_t count)
+        {
+            std::vector<size_t> permutation(_examples->Size());
+            std::iota(permutation.begin(), permutation.end(), 0);
+
+            // epochs
+            for (size_t e = 0; e < count; ++e)
+            {
+                // early exit
+                if (_solutionInfo.DualityGap() <= _desiredDualityGap)
+                {
+                    break;
+                }
+
+                // generate random permutation
+                if (_permuteData)
+                {
+                    std::shuffle(permutation.begin(), permutation.end(), _randomEngine);
+                }
+
+                // process each example
+                for (size_t index : permutation)
+                {
+                    Step(_examples->Get(index), _exampleInfo[index]);
+                }
+
+                _solutionInfo.numEpochsPerformed++;
+                ComputeObjectives();
+            }
+        }
+
+        template <typename SolutionType, typename LossFunctionType, typename RegularizerType>
+        void SDCAOptimizer<SolutionType, LossFunctionType, RegularizerType>::Step(ExampleType example, ExampleInfo& exampleInfo)
+        {
+            const double tolerance = 1.0e-8;
+
+            auto& dual = exampleInfo.dual;
+
+            auto lipschitz = exampleInfo.norm2Squared * _normalizedInverseLambda;
+            if (lipschitz < tolerance)
+            {
+                return;
+            }
+
+            auto prediction = example.input * _w;
+            prediction /= lipschitz;
+            prediction += dual;
+
+            auto newDual = _lossFunction.ConjugateProx(1.0 / lipschitz, prediction, example.output);
+            dual -= newDual;
+            dual *= _normalizedInverseLambda;
+
+            _v += Transpose(example.input) * dual;
+            _regularizer.ConjugateGradient(_v, _w);
+            exampleInfo.dual = newDual;
+        }
+
+        template <typename SolutionType, typename LossFunctionType, typename RegularizerType>
+        void SDCAOptimizer<SolutionType, LossFunctionType, RegularizerType>::ComputeObjectives()
+        {
+            double primalSum = 0;
+            double dualSum = 0;
+
+            for (size_t i = 0; i < _examples->Size(); ++i)
+            {
+                auto example = _examples->Get(i);
+
+                auto prediction = example.input * _w;
+                primalSum += _lossFunction.Value(prediction, example.output);
+
+                dualSum += _lossFunction.Conjugate(_exampleInfo[i].dual, example.output);
+            }
+
+            _solutionInfo.primalObjective = primalSum / _examples->Size() + _lambda * _regularizer.Value(_w);
+            _solutionInfo.dualObjective = -dualSum / _examples->Size() - _lambda * _regularizer.Conjugate(_v);
+        }
+
+        template <typename SolutionType, typename LossFunctionType, typename RegularizerType>
+        SDCAOptimizer<SolutionType, LossFunctionType, RegularizerType> MakeSDCAOptimizer(std::shared_ptr<const typename SolutionType::ExampleSetType> examples, LossFunctionType lossFunction, RegularizerType regularizer, SDCAOptimizerParameters parameters)
+        {
+            return SDCAOptimizer<SolutionType, LossFunctionType, RegularizerType>(examples, lossFunction, regularizer, parameters);
+        }
+    } // namespace optimization
+} // namespace trainers
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/trainers/optimization/include/SGDOptimizer.h b/libraries/trainers/optimization/include/SGDOptimizer.h
index 6dccccc88..c3c1a9ff6 100644
--- a/libraries/trainers/optimization/include/SGDOptimizer.h
+++ b/libraries/trainers/optimization/include/SGDOptimizer.h
@@ -75,4 +75,100 @@ namespace trainers
 } // namespace trainers
 } // namespace ell
 
-#include "../tcc/SGDOptimizer.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace trainers
+{
+    namespace optimization
+    {
+        /// <summary> </summary>
+        template <typename SolutionType, typename LossFunctionType>
+        SGDOptimizer<SolutionType, LossFunctionType>::SGDOptimizer(std::shared_ptr<const ExampleSetType> examples, LossFunctionType lossFunction, SGDOptimizerParameters parameters) :
+            _examples(examples),
+            _lossFunction(std::move(lossFunction)),
+            _lambda(parameters.regularization)
+        {
+            if (!examples || examples->Size() == 0)
+            {
+                throw OptimizationException("Empty dataset");
+            }
+
+            // check that all the outputs are compatible with the loss
+            for (size_t i = 0; i < examples->Size(); ++i)
+            {
+                auto example = examples->Get(i);
+
+                if (!_lossFunction.VerifyOutput(example.output))
+                {
+                    throw OptimizationException("Discovered an output that is incompatible with the chosen loss function");
+                }
+            }
+
+            // setup random engine
+            std::seed_seq seed(parameters.randomSeedString.begin(), parameters.randomSeedString.end());
+            _randomEngine.seed(seed);
+
+            auto example = examples->Get(0);
+            _lastW.Resize(example.input, example.output);
+            _averagedW.Resize(example.input, example.output);
+        }
+
+        template <typename SolutionType, typename LossFunctionType>
+        void SGDOptimizer<SolutionType, LossFunctionType>::PerformEpochs(size_t count)
+        {
+            if (_examples == nullptr)
+            {
+                throw OptimizationException("Call SetExamples before calling Epoch");
+            }
+
+            std::vector<size_t> permutation(_examples->Size());
+            std::iota(permutation.begin(), permutation.end(), 0);
+
+            // epochs
+            for (size_t e = 0; e < count; ++e)
+            {
+                // generate random permutation
+                std::shuffle(permutation.begin(), permutation.end(), _randomEngine);
+
+                // process each example
+                for (size_t index : permutation)
+                {
+                    Step(_examples->Get(index));
+                }
+            }
+        }
+
+        template <typename SolutionType, typename LossFunctionType>
+        void SGDOptimizer<SolutionType, LossFunctionType>::Step(ExampleType example)
+        {
+            const auto& x = example.input;
+            const auto& y = example.output;
+            double weight = example.weight;
+
+            ++_t;
+
+            // predict
+            auto p = x * _lastW;
+
+            // calculate the loss derivative
+            auto derivative = _lossFunction.Derivative(p, y);
+            derivative *= -weight / (_lambda * _t);
+
+            // update the solution
+            double inverseT = 1.0 / _t;
+            _lastW = _lastW * (1.0 - inverseT) + Transpose(x) * derivative;
+            _averagedW = _averagedW * (1.0 - inverseT) + _lastW * inverseT;
+        }
+
+        template <typename SolutionType, typename LossFunctionType>
+        SGDOptimizer<SolutionType, LossFunctionType> MakeSGDOptimizer(std::shared_ptr<const typename SolutionType::ExampleSetType> examples, LossFunctionType lossFunction, SGDOptimizerParameters parameters)
+        {
+            return SGDOptimizer<SolutionType, LossFunctionType>(examples, lossFunction, parameters);
+        }
+    } // namespace optimization
+} // namespace trainers
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/trainers/optimization/include/SmoothedHingeLoss.h b/libraries/trainers/optimization/include/SmoothedHingeLoss.h
index e891e4cf2..f61986d52 100644
--- a/libraries/trainers/optimization/include/SmoothedHingeLoss.h
+++ b/libraries/trainers/optimization/include/SmoothedHingeLoss.h
@@ -77,4 +77,92 @@ namespace trainers
 } // namespace trainers
 } // namespace ell
 
-#include "../tcc/SmoothedHingeLoss.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace trainers
+{
+    namespace optimization
+    {
+        template <typename OutputType>
+        bool SmoothedHingeLoss::VerifyOutput(OutputType output)
+        {
+            if (output == 1.0 || output == -1.0)
+            {
+                return true;
+            }
+            return false;
+        }
+
+        template <typename OutputType>
+        double SmoothedHingeLoss::Value(double prediction, OutputType output) const
+        {
+            DEBUG_CHECK(VerifyOutput(output), "Smoothed Hinge Loss requires binary outputs");
+
+            double margin = prediction * output;
+            if (margin >= 1.0)
+            {
+                return 0.0;
+            }
+            if (margin >= 1.0 - _gamma)
+            {
+                double residual = (prediction - output);
+                return 0.5 / _gamma * residual * residual;
+            }
+            return 1.0 - margin - 0.5 * _gamma;
+        }
+
+        template <typename OutputType>
+        double SmoothedHingeLoss::Derivative(double prediction, OutputType output) const
+        {
+            DEBUG_CHECK(VerifyOutput(output), "Smoothed Hinge Loss requires binary outputs");
+
+            double margin = prediction * output;
+            if (margin >= 1.0)
+            {
+                return 0.0;
+            }
+            if (margin >= 1.0 - _gamma)
+            {
+                return (prediction - output) / _gamma;
+            }
+            return -output;
+        }
+
+        template <typename OutputType>
+        double SmoothedHingeLoss::Conjugate(double v, OutputType output) const
+        {
+            DEBUG_CHECK(VerifyOutput(output), "Smoothed Hinge Loss requires binary outputs");
+
+            double a = output * v;
+
+            if (-1.0 <= a && a <= 0.0)
+            {
+                return a + 0.5 * _gamma * v * v;
+            }
+            return std::numeric_limits<double>::infinity();
+        }
+
+        template <typename OutputType>
+        double SmoothedHingeLoss::ConjugateProx(double theta, double z, OutputType output) const
+        {
+            DEBUG_CHECK(VerifyOutput(output), "Smoothed Hinge Loss requires binary outputs");
+
+            double a = output * z;
+
+            if (a < (1.0 - _gamma) * theta - 1.0)
+            {
+                return -output;
+            }
+            if (a <= theta)
+            {
+                return (z - theta * output) / (1 + theta * _gamma);
+            }
+            return 0.0;
+        }
+    } // namespace optimization
+} // namespace trainers
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/trainers/optimization/include/SquareLoss.h b/libraries/trainers/optimization/include/SquareLoss.h
index 1591f2f3e..f6dae4975 100644
--- a/libraries/trainers/optimization/include/SquareLoss.h
+++ b/libraries/trainers/optimization/include/SquareLoss.h
@@ -69,4 +69,40 @@ namespace trainers
 } // namespace trainers
 } // namespace ell
 
-#include "../tcc/SquareLoss.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace trainers
+{
+    namespace optimization
+    {
+        template <typename OutputType>
+        double SquareLoss::Value(double prediction, OutputType output)
+        {
+            double residual = prediction - output;
+            return 0.5 * residual * residual;
+        }
+
+        template <typename OutputType>
+        double SquareLoss::Derivative(double prediction, OutputType output)
+        {
+            return prediction - output;
+        }
+
+        template <typename OutputType>
+        double SquareLoss::Conjugate(double v, OutputType output)
+        {
+            return (0.5 * v + output) * v;
+        }
+
+        template <typename OutputType>
+        double SquareLoss::ConjugateProx(double theta, double z, OutputType output)
+        {
+            return (z - theta * output) / (1 + theta);
+        }
+    } // namespace optimization
+} // namespace trainers
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/trainers/optimization/include/SquaredHingeLoss.h b/libraries/trainers/optimization/include/SquaredHingeLoss.h
index f81bf384d..9a7d75b2e 100644
--- a/libraries/trainers/optimization/include/SquaredHingeLoss.h
+++ b/libraries/trainers/optimization/include/SquaredHingeLoss.h
@@ -68,4 +68,80 @@ namespace trainers
 } // namespace trainers
 } // namespace ell
 
-#include "../tcc/SquaredHingeLoss.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace trainers
+{
+    namespace optimization
+    {
+        template <typename OutputType>
+        bool SquaredHingeLoss::VerifyOutput(OutputType output)
+        {
+            if (output == 1.0 || output == -1.0)
+            {
+                return true;
+            }
+            return false;
+        }
+
+        template <typename OutputType>
+        double SquaredHingeLoss::Value(double prediction, OutputType output)
+        {
+            DEBUG_CHECK(VerifyOutput(output), "Squared Hinge Loss requires binary outputs");
+
+            double margin = prediction * output;
+
+            if (margin >= 1.0)
+            {
+                return 0.0;
+            }
+            double hinge = 1.0 - margin;
+            return 0.5 * hinge * hinge;
+        }
+
+        template <typename OutputType>
+        double SquaredHingeLoss::Derivative(double prediction, OutputType output)
+        {
+            DEBUG_CHECK(VerifyOutput(output), "Squared Hinge Loss requires binary outputs");
+
+            double margin = prediction * output;
+
+            if (margin >= 1.0)
+            {
+                return 0.0;
+            }
+            return -output * (1.0 - margin);
+        }
+
+        template <typename OutputType>
+        double SquaredHingeLoss::Conjugate(double v, OutputType output)
+        {
+            DEBUG_CHECK(VerifyOutput(output), "Squared Hinge Loss requires binary outputs");
+
+            double a = output * v;
+
+            if (a <= 0)
+            {
+                return a + 0.5 * v * v;
+            }
+            return std::numeric_limits<double>::infinity();
+        }
+
+        template <typename OutputType>
+        double SquaredHingeLoss::ConjugateProx(double theta, double z, OutputType output)
+        {
+            DEBUG_CHECK(VerifyOutput(output), "Squared Hinge Loss requires binary outputs");
+
+            if (output * z <= theta)
+            {
+                return (z - theta * output) / (1 + theta);
+            }
+            return 0.0;
+        }
+    } // namespace optimization
+} // namespace trainers
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/trainers/optimization/include/VectorSolution.h b/libraries/trainers/optimization/include/VectorSolution.h
index 947d603d0..18ef84fc3 100644
--- a/libraries/trainers/optimization/include/VectorSolution.h
+++ b/libraries/trainers/optimization/include/VectorSolution.h
@@ -110,4 +110,179 @@ namespace trainers
 } // namespace trainers
 } // namespace ell
 
-#include "../tcc/VectorSolution.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace trainers
+{
+    namespace optimization
+    {
+        template <typename IOElementType, bool isBiased>
+        void VectorSolution<IOElementType, isBiased>::Resize(const InputType& inputExample, OutputType)
+        {
+            _weights.Resize(inputExample.Size());
+
+            if constexpr (!isDouble)
+            {
+                _doubleInput.Resize(inputExample.Size());
+            }
+        }
+
+        template <typename IOElementType, bool isBiased>
+        void VectorSolution<IOElementType, isBiased>::operator=(const VectorSolution<IOElementType, isBiased>& other)
+        {
+            _weights.CopyFrom(other._weights);
+
+            if constexpr (isBiased)
+            {
+                _bias = other._bias;
+            }
+        }
+
+        template <typename IOElementType, bool isBiased>
+        void VectorSolution<IOElementType, isBiased>::operator=(SumExpression<ScaledExpression<VectorSolution<IOElementType, isBiased>>, ScaledExpression<VectorSolution<IOElementType, isBiased>>> expression)
+        {
+            const auto& thisTerm = expression.lhs;
+            const auto& otherTerm = expression.rhs;
+
+            if (&(thisTerm.lhs.get()) != this)
+            {
+                throw OptimizationException("First term should be a scaled version of this solution");
+            }
+
+            double thisScale = thisTerm.rhs;
+            const auto& otherSolution = otherTerm.lhs.get();
+            double otherScale = otherTerm.rhs;
+            math::ScaleAddUpdate(otherScale, otherSolution.GetVector(), thisScale, _weights);
+
+            if constexpr (isBiased)
+            {
+                _bias = thisScale * _bias + otherScale * otherSolution.GetBias();
+            }
+        }
+
+        template <typename IOElementType, bool isBiased>
+        void VectorSolution<IOElementType, isBiased>::operator=(SumExpression<ScaledExpression<VectorSolution<IOElementType, isBiased>>, ScaledColumnVectorExpression<IOElementType>> expression)
+        {
+            const auto& thisTerm = expression.lhs;
+            const auto& updateTerm = expression.rhs;
+
+            if (&(thisTerm.lhs.get()) != this)
+            {
+                throw OptimizationException("One of the terms should be a scaled version of this solution");
+            }
+
+            double thisScale = thisTerm.rhs;
+            auto updateVector = updateTerm.lhs;
+            double updateScale = updateTerm.rhs;
+
+            if constexpr (isDouble)
+            {
+                math::ScaleAddUpdate(updateScale, updateVector, thisScale, _weights);
+            }
+            else
+            {
+                auto doubleColumnVector = _doubleInput.Transpose();
+                doubleColumnVector.CopyFrom(updateVector);
+                math::ScaleAddUpdate(updateScale, doubleColumnVector, thisScale, _weights);
+            }
+
+            if constexpr (isBiased)
+            {
+                _bias = thisScale * _bias + updateScale;
+            }
+        }
+
+        template <typename IOElementType, bool isBiased>
+        void VectorSolution<IOElementType, isBiased>::operator-=(const VectorSolution<IOElementType, isBiased>& other)
+        {
+            _weights -= other._weights;
+            if constexpr (isBiased)
+            {
+                _bias -= other._bias;
+            }
+        }
+
+        template <typename IOElementType, bool isBiased>
+        void VectorSolution<IOElementType, isBiased>::operator+=(ScaledColumnVectorExpression<IOElementType> expression)
+        {
+            const auto& updateVector = expression.lhs;
+            double updateScale = expression.rhs;
+
+            if constexpr (isDouble)
+            {
+                math::ScaleAddUpdate(updateScale, updateVector, 1.0, _weights);
+            }
+            else
+            {
+                auto doubleColumnVector = _doubleInput.Transpose();
+                doubleColumnVector.CopyFrom(updateVector);
+                math::ScaleAddUpdate(updateScale, doubleColumnVector, 1.0, _weights);
+            }
+
+            if constexpr (isBiased)
+            {
+                _bias += updateScale;
+            }
+        }
+
+        template <typename IOElementType, bool isBiased>
+        double VectorSolution<IOElementType, isBiased>::Multiply(const InputType& input) const
+        {
+            double result;
+
+            if constexpr (isDouble)
+            {
+                result = math::Dot(input, _weights);
+            }
+            else
+            {
+                _doubleInput.CopyFrom(input);
+                result = math::Dot(_doubleInput, _weights);
+            }
+
+            if constexpr (isBiased)
+            {
+                result += _bias;
+            }
+
+            return result;
+        }
+
+        template <typename IOElementType, bool isBiased>
+        double VectorSolution<IOElementType, isBiased>::GetNorm2SquaredOf(const InputType& input)
+        {
+            double result = input.Norm2Squared();
+
+            if constexpr (isBiased)
+            {
+                result += 1.0;
+            }
+
+            return result;
+        }
+
+        template <typename IOElementType, bool isBiased>
+        double Norm2Squared(const VectorSolution<IOElementType, isBiased>& solution)
+        {
+            double result = solution.GetVector().Norm2Squared();
+
+            if constexpr (isBiased)
+            {
+                result += solution.GetBias() * solution.GetBias();
+            }
+
+            return result;
+        }
+
+        template <typename IOElementType, bool isBiased>
+        double operator*(math::ConstRowVectorReference<IOElementType> input, const VectorSolution<IOElementType, isBiased>& solution)
+        {
+            return solution.Multiply(input);
+        }
+    } // namespace optimization
+} // namespace trainers
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/trainers/optimization/tcc/AbsoluteLoss.tcc b/libraries/trainers/optimization/tcc/AbsoluteLoss.tcc
deleted file mode 100644
index 7368e8061..000000000
--- a/libraries/trainers/optimization/tcc/AbsoluteLoss.tcc
+++ /dev/null
@@ -1,64 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     AbsoluteLoss.tcc (optimization)
-//  Authors:  Lin Xiao, Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#include <cmath>
-
-namespace ell
-{
-namespace trainers
-{
-    namespace optimization
-    {
-        template <typename OutputType>
-        double AbsoluteLoss::Value(double prediction, OutputType output)
-        {
-            return std::abs(prediction - output);
-        }
-
-        template <typename OutputType>
-        double AbsoluteLoss::Derivative(double prediction, OutputType output)
-        {
-            if (prediction == output)
-            {
-                return 0.0;
-            }
-            if (prediction < output)
-            {
-                return -1.0;
-            }
-            return 1.0;
-        }
-
-        template <typename OutputType>
-        double AbsoluteLoss::Conjugate(double v, OutputType output)
-        {
-            if (-1.0 <= v && v <= 1.0)
-            {
-                return output * v;
-            }
-            return std::numeric_limits<double>::infinity();
-        }
-
-        template <typename OutputType>
-        double AbsoluteLoss::ConjugateProx(double theta, double z, OutputType output)
-        {
-            double a = z - theta * output;
-
-            if (a <= -1.0)
-            {
-                return -1.0;
-            }
-            if (a >= 1.0)
-            {
-                return 1.0;
-            }
-            return a;
-        }
-    } // namespace optimization
-} // namespace trainers
-} // namespace ell
\ No newline at end of file
diff --git a/libraries/trainers/optimization/tcc/ElasticNetRegularizer.tcc b/libraries/trainers/optimization/tcc/ElasticNetRegularizer.tcc
deleted file mode 100644
index 1bc982805..000000000
--- a/libraries/trainers/optimization/tcc/ElasticNetRegularizer.tcc
+++ /dev/null
@@ -1,40 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     ElasticNetRegularizer.tcc (optimization)
-//  Authors:  Lin Xiao, Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace trainers
-{
-    namespace optimization
-    {
-        template <typename SolutionType>
-        double ElasticNetRegularizer::Value(const SolutionType& w) const
-        {
-            return 0.5 * Norm2Squared(w) + _beta * w.GetVector().Norm1(); // note: Norm1 does not include the bias term
-        }
-
-        template <typename SolutionType>
-        double ElasticNetRegularizer::Conjugate(const SolutionType& v) const
-        {
-            SolutionType w = v;
-            L1Prox(w.GetVector(), _beta); // note: L1 term does not apply to the bias term
-            double result = -_beta * w.GetVector().Norm1();
-            w -= v;
-            result += 0.5 * (Norm2Squared(v) - Norm2Squared(w));
-            return result;
-        }
-
-        template <typename SolutionType>
-        void ElasticNetRegularizer::ConjugateGradient(const SolutionType& v, SolutionType& w) const
-        {
-            w = v;
-            L1Prox(w.GetVector(), _beta); // note: L1Prox does not apply to the bias term
-        }
-    } // namespace optimization
-} // namespace trainers
-} // namespace ell
diff --git a/libraries/trainers/optimization/tcc/Expression.tcc b/libraries/trainers/optimization/tcc/Expression.tcc
deleted file mode 100644
index 7d2c83442..000000000
--- a/libraries/trainers/optimization/tcc/Expression.tcc
+++ /dev/null
@@ -1,47 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     Expression.tcc (optimization)
-//  Authors:  Chuck Jacobs, Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-
-namespace ell
-{
-namespace trainers
-{
-    namespace optimization
-    {
-        template <Operation operation, typename LeftType, typename RightType>
-        Expression<operation, LeftType, RightType> MakeExpression(const LeftType& lhs, const RightType& rhs)
-        {
-            return Expression<operation, LeftType, RightType>(lhs, rhs);
-        }
-
-        template <typename ElementType>
-        ScaledColumnVectorExpression<ElementType> operator*(math::ConstColumnVectorReference<ElementType> vectorReference, double scalar)
-        {
-            return MakeExpression<Operation::product>(vectorReference, scalar);
-        }
-
-        template <typename ElementType>
-        OuterProductExpression<ElementType> operator*(math::ConstColumnVectorReference<ElementType> columnVectorReference, math::ConstRowVectorReference<double> rowVectorReference)
-        {
-            return MakeExpression<Operation::product>(columnVectorReference, rowVectorReference);
-        }
-
-        template <typename T, IsScalable<T> Concept>
-        ScaledExpression<T> operator*(const T& scalable, double scalar)
-        {
-            return MakeExpression<Operation::product>(std::ref(scalable), scalar);
-        }
-
-        template <typename T1, typename T2, IsSummable<T1> Concept1, IsSummable<T2> Concept2>
-        SumExpression<T1, T2> operator+(T1 summable1, T2 summable2)
-        {
-            return MakeExpression<Operation::sum>(summable1, summable2);
-        }
-    } // namespace optimization
-} // namespace trainers
-} // namespace ell
diff --git a/libraries/trainers/optimization/tcc/GoldenSectionMinimizer.tcc b/libraries/trainers/optimization/tcc/GoldenSectionMinimizer.tcc
deleted file mode 100644
index 8c9f25c03..000000000
--- a/libraries/trainers/optimization/tcc/GoldenSectionMinimizer.tcc
+++ /dev/null
@@ -1,102 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     GoldenSectionSearch.tcc (optimization)
-//  Authors:  Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#pragma once
-
-namespace ell
-{
-namespace trainers
-{
-    namespace optimization
-    {
-        template <typename FunctionType>
-        GoldenSectionMinimizer<FunctionType>::GoldenSectionMinimizer(FunctionType function, double lower, double upper) :
-            _boundary1(lower),
-            _boundary2(upper),
-            _function(std::move(function))
-        {
-            _minPoint = _goldenComplement * _boundary1 + _golden * _boundary2;
-            _minPointValue = _function(_minPoint);
-            _boundary1Value = _function(_boundary1);
-            _boundary2Value = _function(_boundary2);
-        }
-
-        template <typename FunctionType>
-        void GoldenSectionMinimizer<FunctionType>::Step(size_t iterations)
-        {
-            for (size_t i = 0; i < iterations; ++i)
-            {
-                Step();
-            }
-        }
-        template <typename FunctionType>
-        void GoldenSectionMinimizer<FunctionType>::MinimizeToPrecision(double precision)
-        {
-            do
-            {
-                Step();
-            } while (GetPrecision() > precision);
-        }
-        template <typename FunctionType>
-        void GoldenSectionMinimizer<FunctionType>::Step()
-        {
-            double newPoint = _goldenComplement * _boundary1 + _golden * _minPoint;
-            double newPointValue = _function(newPoint);
-            if (newPointValue < _minPointValue)
-            {
-                _boundary2 = _minPoint;
-                _boundary2Value = _minPointValue;
-                _minPoint = newPoint;
-                _minPointValue = newPointValue;
-            }
-            else
-            {
-                _boundary1 = _boundary2;
-                _boundary1Value = _boundary2Value;
-                _boundary2 = newPoint;
-                _boundary2Value = newPointValue;
-            }
-        }
-        template <typename FunctionType>
-        double GoldenSectionMinimizer<FunctionType>::GetArgMinLowerBound() const
-        {
-            return std::min(_boundary1, _boundary2);
-        }
-        template <typename FunctionType>
-        double GoldenSectionMinimizer<FunctionType>::GetArgMinUpperBound() const
-        {
-            return std::max(_boundary1, _boundary2);
-        }
-        template <typename FunctionType>
-        double GoldenSectionMinimizer<FunctionType>::GetApproximateArgMin() const
-        {
-            return 0.5 * (_boundary1 + _boundary2);
-        }
-
-        template <typename FunctionType>
-        double GoldenSectionMinimizer<FunctionType>::GetMinUpperBound() const
-        {
-            return _minPointValue;
-        }
-
-        template <typename FunctionType>
-        double GoldenSectionMinimizer<FunctionType>::GetMinLowerBound() const
-        {
-            double min1 = _boundary1Value * (1.0 - 1.0 / _golden) + _minPointValue / _golden;
-            double min2 = _boundary2Value * (1.0 - 1.0 / _goldenComplement) + _minPointValue / _goldenComplement;
-            return std::min(min1, min2);
-        }
-
-        template <typename FunctionType>
-        double GoldenSectionMinimizer<FunctionType>::GetPrecision() const
-        {
-            return GetMinUpperBound() - GetMinLowerBound();
-        }
-    } // namespace optimization
-} // namespace trainers
-} // namespace ell
diff --git a/libraries/trainers/optimization/tcc/HingeLoss.tcc b/libraries/trainers/optimization/tcc/HingeLoss.tcc
deleted file mode 100644
index 142365a68..000000000
--- a/libraries/trainers/optimization/tcc/HingeLoss.tcc
+++ /dev/null
@@ -1,86 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     HingeLoss.tcc (optimization)
-//  Authors:  Lin Xiao, Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace trainers
-{
-    namespace optimization
-    {
-        template <typename OutputType>
-        bool HingeLoss::VerifyOutput(OutputType output)
-        {
-            if (output == 1.0 || output == -1.0)
-            {
-                return true;
-            }
-            return false;
-        }
-
-        template <typename OutputType>
-        double HingeLoss::Value(double prediction, OutputType output)
-        {
-            DEBUG_CHECK(VerifyOutput(output), "Hinge Loss requires binary outputs");
-
-            double margin = prediction * output;
-
-            if (margin >= 1.0)
-            {
-                return 0.0;
-            }
-            return 1.0 - margin;
-        }
-
-        template <typename OutputType>
-        double HingeLoss::Derivative(double prediction, OutputType output)
-        {
-            DEBUG_CHECK(VerifyOutput(output), "Hinge Loss requires binary outputs");
-
-            double margin = prediction * output;
-
-            if (margin >= 1.0)
-            {
-                return 0.0;
-            }
-            return -output;
-        }
-
-        template <typename OutputType>
-        double HingeLoss::Conjugate(double v, OutputType output)
-        {
-            DEBUG_CHECK(VerifyOutput(output), "Hinge Loss requires binary outputs");
-
-            double a = output * v;
-
-            if (-1.0 <= a && a <= 0.0)
-            {
-                return a;
-            }
-            return std::numeric_limits<double>::infinity();
-        }
-
-        template <typename OutputType>
-        double HingeLoss::ConjugateProx(double theta, double z, OutputType output)
-        {
-            DEBUG_CHECK(VerifyOutput(output), "Hinge Loss requires binary outputs");
-
-            double a = output * z;
-
-            if (a < theta - 1.0)
-            {
-                return -output;
-            }
-            if (a <= theta)
-            {
-                return z - theta * output;
-            }
-            return 0.0;
-        }
-    } // namespace optimization
-} // namespace trainers
-} // namespace ell
diff --git a/libraries/trainers/optimization/tcc/HuberLoss.tcc b/libraries/trainers/optimization/tcc/HuberLoss.tcc
deleted file mode 100644
index 048e3c5e9..000000000
--- a/libraries/trainers/optimization/tcc/HuberLoss.tcc
+++ /dev/null
@@ -1,70 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     HuberLoss.tcc (optimization)
-//  Authors:  Lin Xiao, Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace trainers
-{
-    namespace optimization
-    {
-        template <typename OutputType>
-        double HuberLoss::Value(double prediction, OutputType output) const
-        {
-            double residual = prediction - output;
-
-            if (residual >= -_gamma && residual <= _gamma)
-            {
-                return 0.5 / _gamma * residual * residual;
-            }
-            return std::abs(residual) - 0.5 * _gamma;
-        }
-
-        template <typename OutputType>
-        double HuberLoss::Derivative(double prediction, OutputType output) const
-        {
-            double residual = prediction - output;
-
-            if (residual >= -_gamma && residual <= _gamma)
-            {
-                return residual / _gamma;
-            }
-            if (residual > 0)
-            {
-                return 1.0;
-            }
-            return -1.0;
-        }
-
-        template <typename OutputType>
-        double HuberLoss::Conjugate(double v, OutputType output) const
-        {
-            if (-1.0 <= v && v <= 1.0)
-            {
-                return output * v + 0.5 * _gamma * v * v;
-            }
-            return std::numeric_limits<double>::infinity();
-        }
-
-        template <typename OutputType>
-        double HuberLoss::ConjugateProx(double theta, double z, OutputType output) const
-        {
-            double a = (z - theta * output) / (1 + theta * _gamma);
-
-            if (a <= -1.0)
-            {
-                return -1.0;
-            }
-            if (a >= 1.0)
-            {
-                return 1.0;
-            }
-            return a;
-        }
-    } // namespace optimization
-} // namespace trainers
-} // namespace ell
\ No newline at end of file
diff --git a/libraries/trainers/optimization/tcc/IndexedContainer.tcc b/libraries/trainers/optimization/tcc/IndexedContainer.tcc
deleted file mode 100644
index 8a90c8816..000000000
--- a/libraries/trainers/optimization/tcc/IndexedContainer.tcc
+++ /dev/null
@@ -1,23 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     IndexedContainer.tcc (optimization)
-//  Authors:  Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-
-namespace ell
-{
-namespace trainers
-{
-    namespace optimization
-    {
-        template <typename VectorElementType, typename IndexedContainerElementType>
-        IndexedContainerElementType VectorIndexedContainer<VectorElementType, IndexedContainerElementType>::Get(size_t index) const
-        {
-            return IndexedContainerElementType((*this)[index]);
-        }
-    } // namespace optimization
-} // namespace trainers
-} // namespace ell
diff --git a/libraries/trainers/optimization/tcc/L2Regularizer.tcc b/libraries/trainers/optimization/tcc/L2Regularizer.tcc
deleted file mode 100644
index 31037c437..000000000
--- a/libraries/trainers/optimization/tcc/L2Regularizer.tcc
+++ /dev/null
@@ -1,34 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     L2Regularizer.tcc (optimization)
-//  Authors:  Lin Xiao, Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace trainers
-{
-    namespace optimization
-    {
-        template <typename SolutionType>
-        double L2Regularizer::Value(const SolutionType& w)
-        {
-            return 0.5 * Norm2Squared(w);
-        }
-
-        template <typename SolutionType>
-        double L2Regularizer::Conjugate(const SolutionType& v)
-        {
-            return 0.5 * Norm2Squared(v);
-        }
-
-        template <typename SolutionType>
-        void L2Regularizer::ConjugateGradient(const SolutionType& v, SolutionType& w)
-        {
-            w = v;
-        }
-    } // namespace optimization
-} // namespace trainers
-} // namespace ell
\ No newline at end of file
diff --git a/libraries/trainers/optimization/tcc/LogisticLoss.tcc b/libraries/trainers/optimization/tcc/LogisticLoss.tcc
deleted file mode 100644
index 3e07b6c68..000000000
--- a/libraries/trainers/optimization/tcc/LogisticLoss.tcc
+++ /dev/null
@@ -1,102 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     LogisticLoss.tcc (optimization)
-//  Authors:  Lin Xiao, Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace trainers
-{
-    namespace optimization
-    {
-        template <typename OutputType>
-        bool LogisticLoss::VerifyOutput(OutputType output)
-        {
-            if (output == 1.0 || output == -1.0)
-            {
-                return true;
-            }
-            return false;
-        }
-
-        template <typename OutputType>
-        double LogisticLoss::Value(double prediction, OutputType output)
-        {
-            DEBUG_CHECK(VerifyOutput(output), "Logistic Loss requires binary outputs");
-
-            const double exponentLimit = 18.0;
-
-            double margin = prediction * output;
-
-            if (margin <= -exponentLimit)
-            {
-                return -margin;
-            }
-            return std::log1p(std::exp(-margin));
-        }
-
-        template <typename OutputType>
-        double LogisticLoss::Derivative(double prediction, OutputType output)
-        {
-            DEBUG_CHECK(VerifyOutput(output), "Logistic Loss requires binary outputs");
-
-            double margin = static_cast<double>(prediction * output);
-
-            if (margin <= 0.0)
-            {
-                return -output / (1 + std::exp(margin));
-            }
-            auto expNegMargin = std::exp(-margin);
-            return -output * expNegMargin / (1 + expNegMargin);
-        }
-
-        template <typename OutputType>
-        double LogisticLoss::Conjugate(double v, OutputType output)
-        {
-            DEBUG_CHECK(VerifyOutput(output), "Logistic Loss requires binary outputs");
-
-            const double conjugateBoundary = 1.0e-12;
-            double a = output * v;
-
-            if (a < -1.0 || a > 0.0)
-            {
-                return std::numeric_limits<double>::infinity();
-            }
-            if (a <= conjugateBoundary - 1.0 || -conjugateBoundary <= a)
-            {
-                return 0.0;
-            }
-            return (1.0 + a) * std::log1p(a) + (-a) * std::log(-a);
-        }
-
-        template <typename OutputType>
-        double LogisticLoss::ConjugateProx(double theta, double z, OutputType output)
-        {
-            DEBUG_CHECK(VerifyOutput(output), "Logistic Loss requires binary outputs");
-
-            const double conjugateBoundary = 1.0e-12;
-            const double conjugateProxDesiredDualityGap = 1.0e-6;
-            const size_t conjugateProxMaxIterations = 20;
-
-            double lowerBound = conjugateBoundary - 1.0;
-            double upperBound = -conjugateBoundary;
-
-            double a = output * z;
-            double b = std::min(upperBound, std::max(lowerBound, a));
-            double f = 0, df = 0;
-            for (size_t k = 0; k < conjugateProxMaxIterations; ++k)
-            {
-                f = b - a + theta * log((1.0 + b) / (-b));
-                if (std::abs(f) <= conjugateProxDesiredDualityGap) break;
-                df = 1.0 - theta / (b * (1.0 + b));
-                b -= f / df;
-                b = std::min(upperBound, std::max(lowerBound, b));
-            }
-            return b * output;
-        }
-    } // namespace optimization
-} // namespace trainers
-} // namespace ell
diff --git a/libraries/trainers/optimization/tcc/MatrixExampleSet.tcc b/libraries/trainers/optimization/tcc/MatrixExampleSet.tcc
deleted file mode 100644
index f3b3498b5..000000000
--- a/libraries/trainers/optimization/tcc/MatrixExampleSet.tcc
+++ /dev/null
@@ -1,33 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     MatrixExampleSet.tcc (optimization)
-//  Authors:  Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace trainers
-{
-    namespace optimization
-    {
-        template <typename ElementType>
-        MatrixExampleSet<ElementType>::MatrixExampleSet(math::RowMatrix<ElementType> input, math::RowMatrix<ElementType> output) :
-            _input(std::move(input)),
-            _output(std::move(output))
-        {
-            if (_input.NumRows() != _output.NumRows())
-            {
-                throw OptimizationException("Number of inputs and outputs don't match");
-            }
-        }
-
-        template <typename ElementType>
-        auto MatrixExampleSet<ElementType>::Get(size_t index) const -> ExampleType
-        {
-            return ExampleType(_input.GetRow(index), _output.GetRow(index));
-        }
-    } // namespace optimization
-} // namespace trainers
-} // namespace ell
diff --git a/libraries/trainers/optimization/tcc/MatrixSolution.tcc b/libraries/trainers/optimization/tcc/MatrixSolution.tcc
deleted file mode 100644
index ad1bcab0f..000000000
--- a/libraries/trainers/optimization/tcc/MatrixSolution.tcc
+++ /dev/null
@@ -1,194 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     MatrixSolution.tcc (optimization)
-//  Authors:  Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace trainers
-{
-    namespace optimization
-    {
-        template <typename IOElementType, bool isBiased>
-        void MatrixSolution<IOElementType, isBiased>::Resize(const InputType& inputExample, const OutputType& outputExample)
-        {
-            math::ColumnMatrix<double> matrix(inputExample.Size(), outputExample.Size());
-            _weights.Swap(matrix);
-
-            if constexpr (!isDouble)
-            {
-                _doubleInput.Resize(inputExample.Size());
-            }
-
-            if constexpr (isBiased)
-            {
-                _bias.Resize(outputExample.Size());
-            }
-        }
-
-        template <typename IOElementType, bool isBiased>
-        void MatrixSolution<IOElementType, isBiased>::operator=(const MatrixSolution<IOElementType, isBiased>& other)
-        {
-            _weights.CopyFrom(other._weights);
-
-            if constexpr (isBiased)
-            {
-                _bias.CopyFrom(other._bias);
-            }
-        }
-
-        template <typename IOElementType, bool isBiased>
-        void MatrixSolution<IOElementType, isBiased>::operator=(SumExpression<ScaledExpression<MatrixSolution<IOElementType, isBiased>>, ScaledExpression<MatrixSolution<IOElementType, isBiased>>> expression)
-        {
-            const auto& thisTerm = expression.lhs;
-            const auto& otherTerm = expression.rhs;
-
-            if (&(thisTerm.lhs.get()) != this)
-            {
-                throw OptimizationException("First term should be a scaled version of this solution");
-            }
-
-            double thisScale = thisTerm.rhs;
-            const auto& otherSolution = otherTerm.lhs.get();
-            double otherScale = otherTerm.rhs;
-            math::ScaleAddUpdate(otherScale, otherSolution._weights, thisScale, _weights);
-
-            if constexpr (isBiased)
-            {
-                math::ScaleAddUpdate(otherScale, otherSolution.GetBias(), thisScale, _bias);
-            }
-        }
-
-        template <typename IOElementType, bool isBiased>
-        void MatrixSolution<IOElementType, isBiased>::operator=(SumExpression<ScaledExpression<MatrixSolution<IOElementType, isBiased>>, OuterProductExpression<IOElementType>> expression)
-        {
-            const auto& thisTerm = expression.lhs;
-            const auto& updateTerm = expression.rhs;
-
-            if (&(thisTerm.lhs.get()) != this)
-            {
-                throw OptimizationException("The first term should be a scaled version of this solution");
-            }
-
-            double thisScale = thisTerm.rhs;
-            const auto& columnVectorReference = updateTerm.lhs;
-            const auto& rowVectorReference = updateTerm.rhs;
-            _weights *= thisScale;
-
-            if constexpr (isDouble)
-            {
-                math::RankOneUpdate(1.0, columnVectorReference, rowVectorReference, _weights);
-            }
-            else
-            {
-                auto doubleColumnVector = _doubleInput.Transpose();
-                doubleColumnVector.CopyFrom(columnVectorReference);
-                math::RankOneUpdate(1.0, doubleColumnVector, rowVectorReference, _weights);
-            }
-
-            if constexpr (isBiased)
-            {
-                math::ScaleAddUpdate(1.0, rowVectorReference, thisScale, _bias);
-            }
-        }
-
-        template <typename IOElementType, bool isBiased>
-        void MatrixSolution<IOElementType, isBiased>::operator-=(const MatrixSolution<IOElementType, isBiased>& other)
-        {
-            _weights -= other._weights;
-            if constexpr (isBiased)
-            {
-                _bias -= other._bias;
-            }
-        }
-
-        template <typename IOElementType, bool isBiased>
-        void MatrixSolution<IOElementType, isBiased>::operator+=(OuterProductExpression<IOElementType> expression)
-        {
-            const auto& columnVectorReference = expression.lhs;
-            const auto& rowVectorReference = expression.rhs;
-
-            if constexpr (isDouble)
-            {
-                math::RankOneUpdate(1.0, columnVectorReference, rowVectorReference, _weights);
-            }
-            else
-            {
-                auto doubleColumnVector = _doubleInput.Transpose();
-                doubleColumnVector.CopyFrom(columnVectorReference);
-                math::RankOneUpdate(1.0, doubleColumnVector, rowVectorReference, _weights);
-            }
-
-            if constexpr (isBiased)
-            {
-                math::ScaleAddUpdate(1.0, rowVectorReference, 1.0, _bias);
-            }
-        }
-
-        template <typename IOElementType, bool isBiased>
-        math::RowVector<double> MatrixSolution<IOElementType, isBiased>::Multiply(const InputType& input) const
-        {
-            math::RowVector<double> result(_weights.NumColumns());
-
-            if constexpr (isBiased)
-            {
-                result.CopyFrom(_bias);
-            }
-
-            if constexpr (isDouble)
-            {
-                math::MultiplyScaleAddUpdate(1.0, input, _weights, 1.0, result);
-            }
-            else
-            {
-                _doubleInput.CopyFrom(input);
-                math::MultiplyScaleAddUpdate(1.0, _doubleInput, _weights, 1.0, result);
-            }
-
-            return result;
-        }
-
-        template <typename IOElementType, bool isBiased>
-        double MatrixSolution<IOElementType, isBiased>::GetNorm2SquaredOf(const InputType& input)
-        {
-            double result = input.Norm2Squared();
-
-            if constexpr (isBiased)
-            {
-                result += 1.0;
-            }
-
-            return result;
-        }
-
-        template <typename IOElementType, bool isBiased>
-        void MatrixSolution<IOElementType, isBiased>::InitializeAuxiliaryVariable(AuxiliaryDoubleType& aux)
-        {
-            aux.Resize(_weights.NumColumns());
-            aux.Reset();
-        }
-
-        template <typename IOElementType, bool isBiased>
-        double Norm2Squared(const MatrixSolution<IOElementType, isBiased>& solution)
-        {
-            double result = solution.GetMatrix().ReferenceAsVector().Norm2Squared();
-
-            if constexpr (isBiased)
-            {
-                result += solution.GetBias().Norm2Squared();
-            }
-
-            return result;
-        }
-
-        template <typename IOElementType, bool isBiased>
-        math::RowVector<double> operator*(math::ConstRowVectorReference<IOElementType> input, const MatrixSolution<IOElementType, isBiased>& solution)
-        {
-            return solution.Multiply(input);
-        }
-    } // namespace optimization
-} // namespace trainers
-} // namespace ell
diff --git a/libraries/trainers/optimization/tcc/MaxRegularizer.tcc b/libraries/trainers/optimization/tcc/MaxRegularizer.tcc
deleted file mode 100644
index 3fbeecb1f..000000000
--- a/libraries/trainers/optimization/tcc/MaxRegularizer.tcc
+++ /dev/null
@@ -1,40 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     MaxRegularizer.tcc (optimization)
-//  Authors:  Lin Xiao, Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace trainers
-{
-    namespace optimization
-    {
-        template <typename SolutionType>
-        double MaxRegularizer::Value(const SolutionType& w) const
-        {
-            return 0.5 * Norm2Squared(w) + _beta * w.GetVector().NormInfinity(); // note: NormInfinity does not include the bias term
-        }
-
-        template <typename SolutionType>
-        double MaxRegularizer::Conjugate(const SolutionType& v) const
-        {
-            SolutionType w = v;
-            LInfinityProx(w.GetVector(), _scratch, _beta); // note: LInfinity term does not apply to the bias term
-            double result = -_beta * w.GetVector().NormInfinity();
-            w -= v;
-            result += 0.5 * (Norm2Squared(v) - Norm2Squared(w));
-            return result;
-        }
-
-        template <typename SolutionType>
-        void MaxRegularizer::ConjugateGradient(const SolutionType& v, SolutionType& w) const
-        {
-            w = v;
-            LInfinityProx(w.GetVector(), _scratch, _beta); // note: LInfinityProx does not apply to the bias term
-        }
-    } // namespace optimization
-} // namespace trainers
-} // namespace ell
\ No newline at end of file
diff --git a/libraries/trainers/optimization/tcc/MultivariateLoss.tcc b/libraries/trainers/optimization/tcc/MultivariateLoss.tcc
deleted file mode 100644
index d357a6720..000000000
--- a/libraries/trainers/optimization/tcc/MultivariateLoss.tcc
+++ /dev/null
@@ -1,83 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     MultivariateLoss.tcc (optimization)
-//  Authors:  Lin Xiao, Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace trainers
-{
-    namespace optimization
-    {
-        template <typename LossType>
-        MultivariateLoss<LossType>::MultivariateLoss(LossType univariateLoss) :
-            _univariateLoss(std::move(univariateLoss))
-        {}
-
-        template <typename LossType>
-        template <typename OutputElementType>
-        bool MultivariateLoss<LossType>::VerifyOutput(math::ConstRowVectorReference<OutputElementType> output) const
-        {
-            for (size_t i = 0; i < output.Size(); ++i)
-            {
-                if (!_univariateLoss.VerifyOutput(output[i]))
-                {
-                    return false;
-                }
-            }
-            return true;
-        }
-
-        template <typename LossType>
-        template <typename OutputElementType>
-        double MultivariateLoss<LossType>::Value(math::ConstRowVectorReference<double> prediction, math::ConstRowVectorReference<OutputElementType> output) const
-        {
-            double result = 0;
-            for (size_t i = 0; i < prediction.Size(); ++i)
-            {
-                result += _univariateLoss.Value(prediction[i], output[i]);
-            }
-            return result;
-        }
-
-        template <typename LossType>
-        template <typename OutputElementType>
-        math::RowVector<double> MultivariateLoss<LossType>::Derivative(math::ConstRowVectorReference<double> prediction, math::ConstRowVectorReference<OutputElementType> output) const
-        {
-            math::RowVector<double> result(prediction.Size());
-            for (size_t i = 0; i < prediction.Size(); ++i)
-            {
-                result[i] = _univariateLoss.Derivative(prediction[i], output[i]);
-            }
-            return result;
-        }
-
-        template <typename LossType>
-        template <typename OutputElementType>
-        double MultivariateLoss<LossType>::Conjugate(math::ConstRowVectorReference<double> dual, math::ConstRowVectorReference<OutputElementType> output) const
-        {
-            double result = 0;
-            for (size_t i = 0; i < dual.Size(); ++i)
-            {
-                result += _univariateLoss.Conjugate(dual[i], output[i]);
-            }
-            return result;
-        }
-
-        template <typename LossType>
-        template <typename OutputElementType>
-        math::RowVector<double> MultivariateLoss<LossType>::ConjugateProx(double sigma, math::ConstRowVectorReference<double> prediction, math::ConstRowVectorReference<OutputElementType> output) const
-        {
-            math::RowVector<double> result(prediction.Size());
-            for (size_t i = 0; i < prediction.Size(); ++i)
-            {
-                result[i] = _univariateLoss.ConjugateProx(sigma, prediction[i], output[i]);
-            }
-            return result;
-        }
-    } // namespace optimization
-} // namespace trainers
-} // namespace ell
\ No newline at end of file
diff --git a/libraries/trainers/optimization/tcc/OptimizationExample.tcc b/libraries/trainers/optimization/tcc/OptimizationExample.tcc
deleted file mode 100644
index 3c2dbda34..000000000
--- a/libraries/trainers/optimization/tcc/OptimizationExample.tcc
+++ /dev/null
@@ -1,34 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     Example.tcc (optimization)
-//  Authors:  Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-
-namespace ell
-{
-namespace trainers
-{
-    namespace optimization
-    {
-        template <typename InputT, typename OutputT>
-        Example<InputT, OutputT>::Example(InputType input, OutputType output, double weight) :
-            input(std::move(input)),
-            output(std::move(output)),
-            weight(weight)
-        {
-        }
-
-        template <typename InputT, typename OutputT>
-        template <typename InputBase, typename OutputBase>
-        Example<InputT, OutputT>::Example(const Example<InputBase, OutputBase>& other) :
-            input(other.input),
-            output(other.output),
-            weight(other.weight)
-        {
-        }
-    } // namespace optimization
-} // namespace trainers
-} // namespace ell
diff --git a/libraries/trainers/optimization/tcc/SDCAOptimizer.tcc b/libraries/trainers/optimization/tcc/SDCAOptimizer.tcc
deleted file mode 100644
index 9a62ff963..000000000
--- a/libraries/trainers/optimization/tcc/SDCAOptimizer.tcc
+++ /dev/null
@@ -1,155 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     SDCAOptimizer.tcc (optimization)
-//  Authors:  Lin Xiao, Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace trainers
-{
-    namespace optimization
-    {
-        template <typename SolutionType, typename LossFunctionType, typename RegularizerType>
-        SDCAOptimizer<SolutionType, LossFunctionType, RegularizerType>::SDCAOptimizer(std::shared_ptr<const ExampleSetType> examples, LossFunctionType lossFunction, RegularizerType regularizer, SDCAOptimizerParameters parameters) :
-            _examples(examples),
-            _lossFunction(lossFunction),
-            _regularizer(regularizer)
-        {
-            if (!examples || examples->Size() == 0)
-            {
-                throw OptimizationException("Empty dataset");
-            }
-
-            // set parameters
-            _lambda = parameters.regularization;
-            _desiredDualityGap = parameters.desiredDualityGap;
-            _permuteData = parameters.permuteData;
-
-            size_t numExamples = examples->Size();
-            _normalizedInverseLambda = 1.0 / (numExamples * parameters.regularization);
-
-            // set up random engine
-            std::seed_seq seed(parameters.randomSeedString.begin(), parameters.randomSeedString.end());
-            _randomEngine.seed(seed);
-
-            // resize data structures according to examples
-            auto firstExample = examples->Get(0);
-            _w.Resize(firstExample.input, firstExample.output);
-            _v.Resize(firstExample.input, firstExample.output);
-            _exampleInfo.resize(numExamples);
-
-            // initialize the per-example info, check that outputs are compatible with the loss, and compute primal objective
-            double primalSum = 0;
-            for (size_t i = 0; i < numExamples; ++i)
-            {
-                auto example = examples->Get(i);
-
-                if (!_lossFunction.VerifyOutput(example.output))
-                {
-                    throw OptimizationException("Discovered an output that is incompatible with the chosen loss function");
-                }
-
-                // cache the norm of the example
-                double norm2Squared = _w.GetNorm2SquaredOf(example.input);
-                _exampleInfo[i].norm2Squared = norm2Squared;
-
-                // initialize the dual
-                _w.InitializeAuxiliaryVariable(_exampleInfo[i].dual);
-
-                // compute the primal objective
-                auto prediction = example.input * _w;
-                primalSum += _lossFunction.Value(prediction, example.output);
-            }
-
-            _solutionInfo.primalObjective = primalSum / numExamples + _lambda * _regularizer.Value(_w);
-        }
-
-        template <typename SolutionType, typename LossFunctionType, typename RegularizerType>
-        void SDCAOptimizer<SolutionType, LossFunctionType, RegularizerType>::PerformEpochs(size_t count)
-        {
-            std::vector<size_t> permutation(_examples->Size());
-            std::iota(permutation.begin(), permutation.end(), 0);
-
-            // epochs
-            for (size_t e = 0; e < count; ++e)
-            {
-                // early exit
-                if (_solutionInfo.DualityGap() <= _desiredDualityGap)
-                {
-                    break;
-                }
-
-                // generate random permutation
-                if (_permuteData)
-                {
-                    std::shuffle(permutation.begin(), permutation.end(), _randomEngine);
-                }
-
-                // process each example
-                for (size_t index : permutation)
-                {
-                    Step(_examples->Get(index), _exampleInfo[index]);
-                }
-
-                _solutionInfo.numEpochsPerformed++;
-                ComputeObjectives();
-            }
-        }
-
-        template <typename SolutionType, typename LossFunctionType, typename RegularizerType>
-        void SDCAOptimizer<SolutionType, LossFunctionType, RegularizerType>::Step(ExampleType example, ExampleInfo& exampleInfo)
-        {
-            const double tolerance = 1.0e-8;
-
-            auto& dual = exampleInfo.dual;
-
-            auto lipschitz = exampleInfo.norm2Squared * _normalizedInverseLambda;
-            if (lipschitz < tolerance)
-            {
-                return;
-            }
-
-            auto prediction = example.input * _w;
-            prediction /= lipschitz;
-            prediction += dual;
-
-            auto newDual = _lossFunction.ConjugateProx(1.0 / lipschitz, prediction, example.output);
-            dual -= newDual;
-            dual *= _normalizedInverseLambda;
-
-            _v += Transpose(example.input) * dual;
-            _regularizer.ConjugateGradient(_v, _w);
-            exampleInfo.dual = newDual;
-        }
-
-        template <typename SolutionType, typename LossFunctionType, typename RegularizerType>
-        void SDCAOptimizer<SolutionType, LossFunctionType, RegularizerType>::ComputeObjectives()
-        {
-            double primalSum = 0;
-            double dualSum = 0;
-
-            for (size_t i = 0; i < _examples->Size(); ++i)
-            {
-                auto example = _examples->Get(i);
-
-                auto prediction = example.input * _w;
-                primalSum += _lossFunction.Value(prediction, example.output);
-
-                dualSum += _lossFunction.Conjugate(_exampleInfo[i].dual, example.output);
-            }
-
-            _solutionInfo.primalObjective = primalSum / _examples->Size() + _lambda * _regularizer.Value(_w);
-            _solutionInfo.dualObjective = -dualSum / _examples->Size() - _lambda * _regularizer.Conjugate(_v);
-        }
-
-        template <typename SolutionType, typename LossFunctionType, typename RegularizerType>
-        SDCAOptimizer<SolutionType, LossFunctionType, RegularizerType> MakeSDCAOptimizer(std::shared_ptr<const typename SolutionType::ExampleSetType> examples, LossFunctionType lossFunction, RegularizerType regularizer, SDCAOptimizerParameters parameters)
-        {
-            return SDCAOptimizer<SolutionType, LossFunctionType, RegularizerType>(examples, lossFunction, regularizer, parameters);
-        }
-    } // namespace optimization
-} // namespace trainers
-} // namespace ell
diff --git a/libraries/trainers/optimization/tcc/SGDOptimizer.tcc b/libraries/trainers/optimization/tcc/SGDOptimizer.tcc
deleted file mode 100644
index 838b19304..000000000
--- a/libraries/trainers/optimization/tcc/SGDOptimizer.tcc
+++ /dev/null
@@ -1,101 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     SGDOptimizer.tcc (optimization)
-//  Authors:  Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace trainers
-{
-    namespace optimization
-    {
-        /// <summary> </summary>
-        template <typename SolutionType, typename LossFunctionType>
-        SGDOptimizer<SolutionType, LossFunctionType>::SGDOptimizer(std::shared_ptr<const ExampleSetType> examples, LossFunctionType lossFunction, SGDOptimizerParameters parameters) :
-            _examples(examples),
-            _lossFunction(std::move(lossFunction)),
-            _lambda(parameters.regularization)
-        {
-            if (!examples || examples->Size() == 0)
-            {
-                throw OptimizationException("Empty dataset");
-            }
-
-            // check that all the outputs are compatible with the loss
-            for (size_t i = 0; i < examples->Size(); ++i)
-            {
-                auto example = examples->Get(i);
-
-                if (!_lossFunction.VerifyOutput(example.output))
-                {
-                    throw OptimizationException("Discovered an output that is incompatible with the chosen loss function");
-                }
-            }
-
-            // setup random engine
-            std::seed_seq seed(parameters.randomSeedString.begin(), parameters.randomSeedString.end());
-            _randomEngine.seed(seed);
-
-            auto example = examples->Get(0);
-            _lastW.Resize(example.input, example.output);
-            _averagedW.Resize(example.input, example.output);
-        }
-
-        template <typename SolutionType, typename LossFunctionType>
-        void SGDOptimizer<SolutionType, LossFunctionType>::PerformEpochs(size_t count)
-        {
-            if (_examples == nullptr)
-            {
-                throw OptimizationException("Call SetExamples before calling Epoch");
-            }
-
-            std::vector<size_t> permutation(_examples->Size());
-            std::iota(permutation.begin(), permutation.end(), 0);
-
-            // epochs
-            for (size_t e = 0; e < count; ++e)
-            {
-                // generate random permutation
-                std::shuffle(permutation.begin(), permutation.end(), _randomEngine);
-
-                // process each example
-                for (size_t index : permutation)
-                {
-                    Step(_examples->Get(index));
-                }
-            }
-        }
-
-        template <typename SolutionType, typename LossFunctionType>
-        void SGDOptimizer<SolutionType, LossFunctionType>::Step(ExampleType example)
-        {
-            const auto& x = example.input;
-            const auto& y = example.output;
-            double weight = example.weight;
-
-            ++_t;
-
-            // predict
-            auto p = x * _lastW;
-
-            // calculate the loss derivative
-            auto derivative = _lossFunction.Derivative(p, y);
-            derivative *= -weight / (_lambda * _t);
-
-            // update the solution
-            double inverseT = 1.0 / _t;
-            _lastW = _lastW * (1.0 - inverseT) + Transpose(x) * derivative;
-            _averagedW = _averagedW * (1.0 - inverseT) + _lastW * inverseT;
-        }
-
-        template <typename SolutionType, typename LossFunctionType>
-        SGDOptimizer<SolutionType, LossFunctionType> MakeSGDOptimizer(std::shared_ptr<const typename SolutionType::ExampleSetType> examples, LossFunctionType lossFunction, SGDOptimizerParameters parameters)
-        {
-            return SGDOptimizer<SolutionType, LossFunctionType>(examples, lossFunction, parameters);
-        }
-    } // namespace optimization
-} // namespace trainers
-} // namespace ell
diff --git a/libraries/trainers/optimization/tcc/SmoothedHingeLoss.tcc b/libraries/trainers/optimization/tcc/SmoothedHingeLoss.tcc
deleted file mode 100644
index b5c6a7d71..000000000
--- a/libraries/trainers/optimization/tcc/SmoothedHingeLoss.tcc
+++ /dev/null
@@ -1,93 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     SmoothedHingeLoss.tcc (optimization)
-//  Authors:  Lin Xiao, Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace trainers
-{
-    namespace optimization
-    {
-        template <typename OutputType>
-        bool SmoothedHingeLoss::VerifyOutput(OutputType output)
-        {
-            if (output == 1.0 || output == -1.0)
-            {
-                return true;
-            }
-            return false;
-        }
-
-        template <typename OutputType>
-        double SmoothedHingeLoss::Value(double prediction, OutputType output) const
-        {
-            DEBUG_CHECK(VerifyOutput(output), "Smoothed Hinge Loss requires binary outputs");
-
-            double margin = prediction * output;
-            if (margin >= 1.0)
-            {
-                return 0.0;
-            }
-            if (margin >= 1.0 - _gamma)
-            {
-                double residual = (prediction - output);
-                return 0.5 / _gamma * residual * residual;
-            }
-            return 1.0 - margin - 0.5 * _gamma;
-        }
-
-        template <typename OutputType>
-        double SmoothedHingeLoss::Derivative(double prediction, OutputType output) const
-        {
-            DEBUG_CHECK(VerifyOutput(output), "Smoothed Hinge Loss requires binary outputs");
-
-            double margin = prediction * output;
-            if (margin >= 1.0)
-            {
-                return 0.0;
-            }
-            if (margin >= 1.0 - _gamma)
-            {
-                return (prediction - output) / _gamma;
-            }
-            return -output;
-        }
-
-        template <typename OutputType>
-        double SmoothedHingeLoss::Conjugate(double v, OutputType output) const
-        {
-            DEBUG_CHECK(VerifyOutput(output), "Smoothed Hinge Loss requires binary outputs");
-
-            double a = output * v;
-
-            if (-1.0 <= a && a <= 0.0)
-            {
-                return a + 0.5 * _gamma * v * v;
-            }
-            return std::numeric_limits<double>::infinity();
-        }
-
-        template <typename OutputType>
-        double SmoothedHingeLoss::ConjugateProx(double theta, double z, OutputType output) const
-        {
-            DEBUG_CHECK(VerifyOutput(output), "Smoothed Hinge Loss requires binary outputs");
-
-            double a = output * z;
-
-            if (a < (1.0 - _gamma) * theta - 1.0)
-            {
-                return -output;
-            }
-            if (a <= theta)
-            {
-                return (z - theta * output) / (1 + theta * _gamma);
-            }
-            return 0.0;
-        }
-    } // namespace optimization
-} // namespace trainers
-} // namespace ell
diff --git a/libraries/trainers/optimization/tcc/SquareLoss.tcc b/libraries/trainers/optimization/tcc/SquareLoss.tcc
deleted file mode 100644
index 60d2d9cd5..000000000
--- a/libraries/trainers/optimization/tcc/SquareLoss.tcc
+++ /dev/null
@@ -1,41 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     SquareLoss.tcc (optimization)
-//  Authors:  Lin Xiao, Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace trainers
-{
-    namespace optimization
-    {
-        template <typename OutputType>
-        double SquareLoss::Value(double prediction, OutputType output)
-        {
-            double residual = prediction - output;
-            return 0.5 * residual * residual;
-        }
-
-        template <typename OutputType>
-        double SquareLoss::Derivative(double prediction, OutputType output)
-        {
-            return prediction - output;
-        }
-
-        template <typename OutputType>
-        double SquareLoss::Conjugate(double v, OutputType output)
-        {
-            return (0.5 * v + output) * v;
-        }
-
-        template <typename OutputType>
-        double SquareLoss::ConjugateProx(double theta, double z, OutputType output)
-        {
-            return (z - theta * output) / (1 + theta);
-        }
-    } // namespace optimization
-} // namespace trainers
-} // namespace ell
\ No newline at end of file
diff --git a/libraries/trainers/optimization/tcc/SquaredHingeLoss.tcc b/libraries/trainers/optimization/tcc/SquaredHingeLoss.tcc
deleted file mode 100644
index 51efd01c4..000000000
--- a/libraries/trainers/optimization/tcc/SquaredHingeLoss.tcc
+++ /dev/null
@@ -1,81 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     SquaredHingeLoss.tcc (optimization)
-//  Authors:  Lin Xiao, Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace trainers
-{
-    namespace optimization
-    {
-        template <typename OutputType>
-        bool SquaredHingeLoss::VerifyOutput(OutputType output)
-        {
-            if (output == 1.0 || output == -1.0)
-            {
-                return true;
-            }
-            return false;
-        }
-
-        template <typename OutputType>
-        double SquaredHingeLoss::Value(double prediction, OutputType output)
-        {
-            DEBUG_CHECK(VerifyOutput(output), "Squared Hinge Loss requires binary outputs");
-
-            double margin = prediction * output;
-
-            if (margin >= 1.0)
-            {
-                return 0.0;
-            }
-            double hinge = 1.0 - margin;
-            return 0.5 * hinge * hinge;
-        }
-
-        template <typename OutputType>
-        double SquaredHingeLoss::Derivative(double prediction, OutputType output)
-        {
-            DEBUG_CHECK(VerifyOutput(output), "Squared Hinge Loss requires binary outputs");
-
-            double margin = prediction * output;
-
-            if (margin >= 1.0)
-            {
-                return 0.0;
-            }
-            return -output * (1.0 - margin);
-        }
-
-        template <typename OutputType>
-        double SquaredHingeLoss::Conjugate(double v, OutputType output)
-        {
-            DEBUG_CHECK(VerifyOutput(output), "Squared Hinge Loss requires binary outputs");
-
-            double a = output * v;
-
-            if (a <= 0)
-            {
-                return a + 0.5 * v * v;
-            }
-            return std::numeric_limits<double>::infinity();
-        }
-
-        template <typename OutputType>
-        double SquaredHingeLoss::ConjugateProx(double theta, double z, OutputType output)
-        {
-            DEBUG_CHECK(VerifyOutput(output), "Squared Hinge Loss requires binary outputs");
-
-            if (output * z <= theta)
-            {
-                return (z - theta * output) / (1 + theta);
-            }
-            return 0.0;
-        }
-    } // namespace optimization
-} // namespace trainers
-} // namespace ell
diff --git a/libraries/trainers/optimization/tcc/VectorSolution.tcc b/libraries/trainers/optimization/tcc/VectorSolution.tcc
deleted file mode 100644
index d40dcf8a2..000000000
--- a/libraries/trainers/optimization/tcc/VectorSolution.tcc
+++ /dev/null
@@ -1,180 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     VectorSolution.tcc (optimization)
-//  Authors:  Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace trainers
-{
-    namespace optimization
-    {
-        template <typename IOElementType, bool isBiased>
-        void VectorSolution<IOElementType, isBiased>::Resize(const InputType& inputExample, OutputType)
-        {
-            _weights.Resize(inputExample.Size());
-
-            if constexpr (!isDouble)
-            {
-                _doubleInput.Resize(inputExample.Size());
-            }
-        }
-
-        template <typename IOElementType, bool isBiased>
-        void VectorSolution<IOElementType, isBiased>::operator=(const VectorSolution<IOElementType, isBiased>& other)
-        {
-            _weights.CopyFrom(other._weights);
-
-            if constexpr (isBiased)
-            {
-                _bias = other._bias;
-            }
-        }
-
-        template <typename IOElementType, bool isBiased>
-        void VectorSolution<IOElementType, isBiased>::operator=(SumExpression<ScaledExpression<VectorSolution<IOElementType, isBiased>>, ScaledExpression<VectorSolution<IOElementType, isBiased>>> expression)
-        {
-            const auto& thisTerm = expression.lhs;
-            const auto& otherTerm = expression.rhs;
-
-            if (&(thisTerm.lhs.get()) != this)
-            {
-                throw OptimizationException("First term should be a scaled version of this solution");
-            }
-
-            double thisScale = thisTerm.rhs;
-            const auto& otherSolution = otherTerm.lhs.get();
-            double otherScale = otherTerm.rhs;
-            math::ScaleAddUpdate(otherScale, otherSolution.GetVector(), thisScale, _weights);
-
-            if constexpr (isBiased)
-            {
-                _bias = thisScale * _bias + otherScale * otherSolution.GetBias();
-            }
-        }
-
-        template <typename IOElementType, bool isBiased>
-        void VectorSolution<IOElementType, isBiased>::operator=(SumExpression<ScaledExpression<VectorSolution<IOElementType, isBiased>>, ScaledColumnVectorExpression<IOElementType>> expression)
-        {
-            const auto& thisTerm = expression.lhs;
-            const auto& updateTerm = expression.rhs;
-
-            if (&(thisTerm.lhs.get()) != this)
-            {
-                throw OptimizationException("One of the terms should be a scaled version of this solution");
-            }
-
-            double thisScale = thisTerm.rhs;
-            auto updateVector = updateTerm.lhs;
-            double updateScale = updateTerm.rhs;
-
-            if constexpr (isDouble)
-            {
-                math::ScaleAddUpdate(updateScale, updateVector, thisScale, _weights);
-            }
-            else
-            {
-                auto doubleColumnVector = _doubleInput.Transpose();
-                doubleColumnVector.CopyFrom(updateVector);
-                math::ScaleAddUpdate(updateScale, doubleColumnVector, thisScale, _weights);
-            }
-
-            if constexpr (isBiased)
-            {
-                _bias = thisScale * _bias + updateScale;
-            }
-        }
-
-        template <typename IOElementType, bool isBiased>
-        void VectorSolution<IOElementType, isBiased>::operator-=(const VectorSolution<IOElementType, isBiased>& other)
-        {
-            _weights -= other._weights;
-            if constexpr (isBiased)
-            {
-                _bias -= other._bias;
-            }
-        }
-
-        template <typename IOElementType, bool isBiased>
-        void VectorSolution<IOElementType, isBiased>::operator+=(ScaledColumnVectorExpression<IOElementType> expression)
-        {
-            const auto& updateVector = expression.lhs;
-            double updateScale = expression.rhs;
-
-            if constexpr (isDouble)
-            {
-                math::ScaleAddUpdate(updateScale, updateVector, 1.0, _weights);
-            }
-            else
-            {
-                auto doubleColumnVector = _doubleInput.Transpose();
-                doubleColumnVector.CopyFrom(updateVector);
-                math::ScaleAddUpdate(updateScale, doubleColumnVector, 1.0, _weights);
-            }
-
-            if constexpr (isBiased)
-            {
-                _bias += updateScale;
-            }
-        }
-
-        template <typename IOElementType, bool isBiased>
-        double VectorSolution<IOElementType, isBiased>::Multiply(const InputType& input) const
-        {
-            double result;
-
-            if constexpr (isDouble)
-            {
-                result = math::Dot(input, _weights);
-            }
-            else
-            {
-                _doubleInput.CopyFrom(input);
-                result = math::Dot(_doubleInput, _weights);
-            }
-
-            if constexpr (isBiased)
-            {
-                result += _bias;
-            }
-
-            return result;
-        }
-
-        template <typename IOElementType, bool isBiased>
-        double VectorSolution<IOElementType, isBiased>::GetNorm2SquaredOf(const InputType& input)
-        {
-            double result = input.Norm2Squared();
-
-            if constexpr (isBiased)
-            {
-                result += 1.0;
-            }
-
-            return result;
-        }
-
-        template <typename IOElementType, bool isBiased>
-        double Norm2Squared(const VectorSolution<IOElementType, isBiased>& solution)
-        {
-            double result = solution.GetVector().Norm2Squared();
-
-            if constexpr (isBiased)
-            {
-                result += solution.GetBias() * solution.GetBias();
-            }
-
-            return result;
-        }
-
-        template <typename IOElementType, bool isBiased>
-        double operator*(math::ConstRowVectorReference<IOElementType> input, const VectorSolution<IOElementType, isBiased>& solution)
-        {
-            return solution.Multiply(input);
-        }
-    } // namespace optimization
-} // namespace trainers
-} // namespace ell
diff --git a/libraries/trainers/optimization/test/include/LossFunction_test.h b/libraries/trainers/optimization/test/include/LossFunction_test.h
index 502d4ea7c..0f2ec3df0 100644
--- a/libraries/trainers/optimization/test/include/LossFunction_test.h
+++ b/libraries/trainers/optimization/test/include/LossFunction_test.h
@@ -25,4 +25,134 @@ void TestConjugate(LossFunctionType loss, Range vRange, Range outputRange, Range
 template <typename LossFunctionType>
 void TestConjugateProx(LossFunctionType loss, Range thetaRange, Range zRange, Range outputRange, Range comparatorRange);
 
-#include "../tcc/LossFunction_test.tcc"
\ No newline at end of file
+#pragma region implementation
+
+#include <testing/include/testing.h>
+
+#include <trainers/optimization/include/GoldenSectionMinimizer.h>
+
+#include <algorithm>
+#include <cmath>
+
+using namespace ell;
+using namespace ell::trainers::optimization;
+
+template <typename LossFunctionType>
+double TestDerivative(LossFunctionType loss, double prediction, double output)
+{
+    const double epsilon = 1.0e-6;
+    double lossPlus = loss.Value(prediction + epsilon, output);
+    double lossMinus = loss.Value(prediction - epsilon, output);
+    double difference = lossPlus - lossMinus;
+    double limit = difference / (2 * epsilon);
+    double derivative = loss.Derivative(prediction, output);
+    double error = std::abs(derivative - limit);
+
+    return error;
+}
+
+template <typename LossFunctionType>
+void TestDerivative(LossFunctionType loss, Range predictionRange, Range outputRange)
+{
+    double errorTolerance = 1.0e-6;
+    double maxError = 0;
+
+    for (double prediction = predictionRange.from; prediction <= predictionRange.to; prediction += predictionRange.increment)
+    {
+        for (double output = outputRange.from; output <= outputRange.to; output += outputRange.increment)
+        {
+            maxError = std::max(maxError, TestDerivative(loss, prediction, output));
+        }
+    }
+
+    std::string lossName = typeid(LossFunctionType).name();
+    lossName = lossName.substr(lossName.find_last_of(":") + 1);
+
+    testing::ProcessTest("TestDerivative <" + lossName + ">", maxError < errorTolerance);
+}
+
+template <typename LossFunctionType>
+bool TestConjugate(LossFunctionType loss, double v, double output, double lower, double upper)
+{
+    const double tolerance = 1.0e-6;
+
+    double conjugate = loss.Conjugate(v, output);
+    if (std::isinf(conjugate))
+    {
+        return true;
+    }
+
+    auto objective = [&](double x) { return conjugate - x * v + loss.Value(x, output); };
+    auto minimizer = GoldenSectionMinimizer(objective, lower, upper);
+    minimizer.MinimizeToPrecision(tolerance);
+    if (minimizer.GetMinUpperBound() < tolerance && minimizer.GetMinLowerBound() > -tolerance)
+    {
+        return true;
+    }
+    return false;
+}
+
+template <typename LossFunctionType>
+void TestConjugate(LossFunctionType loss, Range vRange, Range outputRange, double lower, double upper)
+{
+    bool success = true;
+    for (double v = vRange.from; v <= vRange.to; v += vRange.increment)
+    {
+        for (double output = outputRange.from; output <= outputRange.to; output += outputRange.increment)
+        {
+            if (!TestConjugate(loss, v, output, lower, upper))
+            {
+                success = false;
+            }
+        }
+    }
+
+    std::string lossName = typeid(LossFunctionType).name();
+    lossName = lossName.substr(lossName.find_last_of(":") + 1);
+
+    testing::ProcessTest("TestConjugate <" + lossName + ">", success);
+}
+
+template <typename LossFunctionType>
+bool TestConjugateProx(LossFunctionType loss, double theta, double z, double output, double lower, double upper)
+{
+    const double tolerance = 1.0e-6;
+
+    double conjugateProx = loss.ConjugateProx(theta, z, output);
+    double conjugateProxValue = theta * loss.Conjugate(conjugateProx, output) + 0.5 * (conjugateProx - z) * (conjugateProx - z);
+    auto objective = [&](double x) { return theta * loss.Conjugate(x, output) + 0.5 * (x - z) * (x - z) - conjugateProxValue; };
+
+    auto minimizer = GoldenSectionMinimizer(objective, lower, upper);
+    minimizer.MinimizeToPrecision(tolerance);
+    if (minimizer.GetMinUpperBound() < tolerance && minimizer.GetMinLowerBound() > -tolerance)
+    {
+        return true;
+    }
+    return false;
+}
+
+template <typename LossFunctionType>
+void TestConjugateProx(LossFunctionType loss, Range thetaRange, Range zRange, Range outputRange, double lower, double upper)
+{
+    bool success = true;
+    for (double z = zRange.from; z <= zRange.to; z += zRange.increment)
+    {
+        for (double output = outputRange.from; output <= outputRange.to; output += outputRange.increment)
+        {
+            for (double theta = thetaRange.from; theta <= thetaRange.to; theta += thetaRange.increment)
+            {
+                if (!TestConjugateProx(loss, theta, z, output, lower, upper))
+                {
+                    success = false;
+                }
+            }
+        }
+    }
+
+    std::string lossName = typeid(LossFunctionType).name();
+    lossName = lossName.substr(lossName.find_last_of(":") + 1);
+
+    testing::ProcessTest("TestConjugateProx <" + lossName + ">", success);
+}
+
+#pragma endregion implementation
diff --git a/libraries/trainers/optimization/test/include/Optimizer_test.h b/libraries/trainers/optimization/test/include/Optimizer_test.h
index 66669a468..df1413810 100644
--- a/libraries/trainers/optimization/test/include/Optimizer_test.h
+++ b/libraries/trainers/optimization/test/include/Optimizer_test.h
@@ -18,4 +18,103 @@ void TestSDCARegressionConvergence(LossFunctionType lossFunction, RegularizerTyp
 template <typename LossFunctionType, typename RegularizerType>
 void TestSDCAClassificationConvergence(LossFunctionType lossFunction, RegularizerType regularizer, SDCAOptimizerParameters parameters, double biasVariance, double marginMean, double inputVariance);
 
-#include "../tcc/Optimizer_test.tcc"
+#pragma region implementation
+
+#include "RandomExampleSet.h"
+
+#include <trainers/optimization/include/IndexedContainer.h>
+#include <trainers/optimization/include/OptimizationExample.h>
+#include <trainers/optimization/include/SDCAOptimizer.h>
+#include <trainers/optimization/include/VectorSolution.h>
+
+#include <testing/include/testing.h>
+
+#include <memory>
+#include <string>
+
+using namespace ell;
+using namespace ell::trainers::optimization;
+
+// assert that the duality gap tends to zero
+template <typename LossFunctionType, typename RegularizerType>
+void TestSDCARegressionConvergence(LossFunctionType lossFunction, RegularizerType regularizer, SDCAOptimizerParameters parameters, double biasVariance, double inputVariance, double outputVariance)
+{
+    size_t count = 500;
+    size_t size = 17;
+    size_t epochs = 50;
+
+    std::string randomSeedString = "GoodLuckMan";
+    std::seed_seq seed(randomSeedString.begin(), randomSeedString.end());
+    std::default_random_engine randomEngine(seed);
+
+    // create random solution
+    VectorSolution<double, true> solution(size);
+    std::normal_distribution<double> biasDistribution(0, biasVariance);
+    solution.GetBias() = biasDistribution(randomEngine);
+
+    std::uniform_int_distribution<int> vectorDistribution(-1, 1);
+    solution.GetVector().Generate([&]() { return vectorDistribution(randomEngine); });
+
+    // create random dataset
+    auto examples = GetRegressionExampleSet(count, inputVariance, outputVariance, solution, randomEngine);
+
+    // create optimizer
+    auto optimizer = MakeSDCAOptimizer<VectorSolution<double, true>>(examples, lossFunction, regularizer, parameters);
+    optimizer.PerformEpochs(epochs);
+    double dualityGap = optimizer.GetSolutionInfo().DualityGap();
+
+    // perform test
+    std::string lossName = typeid(LossFunctionType).name();
+    lossName = lossName.substr(lossName.find_last_of(":") + 1);
+    std::string regularizerName = typeid(RegularizerType).name();
+    regularizerName = regularizerName.substr(regularizerName.find_last_of(":") + 1);
+
+    testing::ProcessTest("TestSDCARegressionConvergence <" + lossName + ", " + regularizerName + ">", dualityGap <= parameters.desiredDualityGap);
+
+    //std::cout << solution.GetBias() << "; " << solution.GetVector() << std::endl;
+    //std::cout << optimizer.GetSolution().GetBias() << "; " << optimizer.GetSolution().GetVector() << "\t" << optimizer.GetSolutionInfo().primalObjective << std::endl;
+}
+
+#include <iostream>
+
+// assert that the duality gap tends to zero
+template <typename LossFunctionType, typename RegularizerType>
+void TestSDCAClassificationConvergence(LossFunctionType lossFunction, RegularizerType regularizer, SDCAOptimizerParameters parameters, double biasVariance, double marginMean, double inputVariance)
+{
+    size_t count = 500;
+    size_t size = 17;
+    size_t epochs = 50;
+
+    std::string randomSeedString = "GoodLuckMan";
+    std::seed_seq seed(randomSeedString.begin(), randomSeedString.end());
+    std::default_random_engine randomEngine(seed);
+
+    // create random solution
+    VectorSolution<double, true> solution(size);
+    std::normal_distribution<double> biasDistribution(0, biasVariance);
+    solution.GetBias() = biasDistribution(randomEngine);
+
+    std::uniform_int_distribution<int> vectorDistribution(-1, 1);
+    solution.GetVector().Generate([&]() { return vectorDistribution(randomEngine); });
+
+    // create random dataset
+    auto examples = GetClassificationExampleSet(count, marginMean, inputVariance, solution, randomEngine);
+
+    // create optimizer
+    auto optimizer = MakeSDCAOptimizer<VectorSolution<double, true>>(examples, lossFunction, regularizer, parameters);
+    optimizer.PerformEpochs(epochs);
+    double dualityGap = optimizer.GetSolutionInfo().DualityGap();
+
+    // perform test
+    std::string lossName = typeid(LossFunctionType).name();
+    lossName = lossName.substr(lossName.find_last_of(":") + 1);
+    std::string regularizerName = typeid(RegularizerType).name();
+    regularizerName = regularizerName.substr(regularizerName.find_last_of(":") + 1);
+
+    testing::ProcessTest("TestSDCAClassificationConvergence <" + lossName + ", " + regularizerName + ">", dualityGap <= parameters.desiredDualityGap);
+
+    //std::cout << solution.GetBias() << "; " << solution.GetVector() << std::endl;
+    //std::cout << optimizer.GetSolution().GetBias() << "; " << optimizer.GetSolution().GetVector() << "\t" << optimizer.GetSolutionInfo().primalObjective << std::endl;
+}
+
+#pragma endregion implementation
diff --git a/libraries/trainers/optimization/test/include/RandomExampleSet.h b/libraries/trainers/optimization/test/include/RandomExampleSet.h
index b6d543925..c20276bee 100644
--- a/libraries/trainers/optimization/test/include/RandomExampleSet.h
+++ b/libraries/trainers/optimization/test/include/RandomExampleSet.h
@@ -30,4 +30,38 @@ std::shared_ptr<VectorIndexedContainer<VectorExampleType, ContainerExampleType>>
 
 std::shared_ptr<VectorIndexedContainer<VectorExampleType, ContainerExampleType>> GetRegressionExampleSet(size_t count, double inputVariance, double outputVariance, const VectorSolution<double, true>& solution, std::default_random_engine& randomEngine);
 
-#include "../tcc/RandomExampleSet.tcc"
+#pragma region implementation
+
+using namespace ell;
+
+template <typename ElementType, typename ExampleType>
+ExampleType GetRandomExample(size_t randomVectorSize, std::default_random_engine& randomEngine, size_t numConstantFeatures = 0)
+{
+    // allocate vector
+    math::RowVector<ElementType> vector(randomVectorSize + numConstantFeatures);
+    vector.Fill(1);
+    auto vectorView = vector.GetSubVector(0, randomVectorSize);
+
+    // generate random values
+    std::normal_distribution<double> normal(0, 200);
+    vectorView.Generate([&]() { return static_cast<ElementType>(normal(randomEngine)); });
+
+    ElementType output = randomEngine() % 2 == 0 ? static_cast<ElementType>(-1) : static_cast<ElementType>(1);
+
+    using OutputType = typename ExampleType::OutputType;
+    return ExampleType{ std::move(vector), OutputType{ output } };
+}
+
+template <typename ElementType, typename VectorExampleType, typename IndexedContainerExampleType>
+std::shared_ptr<VectorIndexedContainer<VectorExampleType, IndexedContainerExampleType>> GetRandomExampleSet(size_t count, size_t randomVectorSize, std::default_random_engine& randomEngine, size_t numConstantFeatures)
+{
+    auto exampleSet = std::make_shared<VectorIndexedContainer<VectorExampleType, IndexedContainerExampleType>>();
+    exampleSet->reserve(count);
+    for (size_t i = 0; i < count; ++i)
+    {
+        exampleSet->push_back(GetRandomExample<ElementType, VectorExampleType>(randomVectorSize, randomEngine, numConstantFeatures));
+    }
+    return exampleSet;
+}
+
+#pragma endregion implementation
diff --git a/libraries/trainers/optimization/test/include/Solution_test.h b/libraries/trainers/optimization/test/include/Solution_test.h
index a04b2d830..77e691f7c 100644
--- a/libraries/trainers/optimization/test/include/Solution_test.h
+++ b/libraries/trainers/optimization/test/include/Solution_test.h
@@ -16,4 +16,154 @@ void TestSolutionEquivalenceSGD(double regularizationParameter);
 template <typename RealType, typename LossFunctionType, typename RegularizerType>
 void TestSolutionEquivalenceSDCA(double regularizationParameter);
 
-#include "../tcc/Solution_test.tcc"
\ No newline at end of file
+#pragma region implementation
+
+#include "RandomExampleSet.h"
+
+#include <trainers/optimization/include/IndexedContainer.h>
+#include <trainers/optimization/include/MatrixSolution.h>
+#include <trainers/optimization/include/MultivariateLoss.h>
+#include <trainers/optimization/include/OptimizationExample.h>
+#include <trainers/optimization/include/SDCAOptimizer.h>
+#include <trainers/optimization/include/SGDOptimizer.h>
+#include <trainers/optimization/include/VectorSolution.h>
+
+#include <testing/include/testing.h>
+
+#include <math/include/Vector.h>
+
+#include <memory>
+#include <string>
+
+using namespace ell;
+using namespace ell::trainers::optimization;
+
+template <typename T>
+using VectorScalarExampleType = Example<math::RowVector<T>, T>;
+
+template <typename T>
+using VectorRefScalarExampleType = Example<math::ConstRowVectorReference<T>, T>;
+
+template <typename T>
+using VectorVectorExampleType = Example<math::RowVector<T>, math::RowVector<T>>;
+
+template <typename T>
+using VectorRefVectorRefExampleType = Example<math::ConstRowVectorReference<T>, math::ConstRowVectorReference<T>>;
+
+// Run the SGD trainer with four different solution types and confirm that the result is identical
+template <typename RealType, typename LossFunctionType, typename RegularizerType>
+void TestSolutionEquivalenceSGD(double regularizationParameter)
+{
+    std::string randomSeedString = "54321blastoff";
+    std::seed_seq seed(randomSeedString.begin(), randomSeedString.end());
+    std::default_random_engine randomEngine;
+
+    const size_t numExamples = 5;
+    const size_t exampleSize = 12;
+
+    randomEngine.seed(seed);
+    auto examples1 = GetRandomExampleSet<RealType, VectorScalarExampleType<RealType>, VectorRefScalarExampleType<RealType>>(numExamples, exampleSize, randomEngine, 1);
+
+    randomEngine.seed(seed);
+    auto examples2 = GetRandomExampleSet<RealType, VectorScalarExampleType<RealType>, VectorRefScalarExampleType<RealType>>(numExamples, exampleSize, randomEngine, 0);
+
+    randomEngine.seed(seed);
+    auto examples3 = GetRandomExampleSet<RealType, VectorVectorExampleType<RealType>, VectorRefVectorRefExampleType<RealType>>(numExamples, exampleSize, randomEngine, 1);
+
+    randomEngine.seed(seed);
+    auto examples4 = GetRandomExampleSet<RealType, VectorVectorExampleType<RealType>, VectorRefVectorRefExampleType<RealType>>(numExamples, exampleSize, randomEngine, 0);
+
+    // setup four equivalent optimizers
+    auto optimizer1 = MakeSGDOptimizer<VectorSolution<RealType>>(examples1, LossFunctionType{}, { regularizationParameter });
+    optimizer1.PerformEpochs();
+    const auto& solution1 = optimizer1.GetSolution();
+    const auto& vector1 = solution1.GetVector();
+
+    auto optimizer2 = MakeSGDOptimizer<VectorSolution<RealType, true>>(examples2, LossFunctionType{}, { regularizationParameter });
+    optimizer2.PerformEpochs();
+    const auto& solution2 = optimizer2.GetSolution();
+    const auto& vector2 = solution2.GetVector();
+
+    auto optimizer3 = MakeSGDOptimizer<MatrixSolution<RealType>>(examples3, MultivariateLoss<LossFunctionType>{}, { regularizationParameter });
+    optimizer3.PerformEpochs();
+    const auto& solution3 = optimizer3.GetSolution();
+    const auto& vector3 = solution3.GetMatrix().GetColumn(0);
+
+    auto optimizer4 = MakeSGDOptimizer<MatrixSolution<RealType, true>>(examples4, MultivariateLoss<LossFunctionType>{}, { regularizationParameter });
+    optimizer4.PerformEpochs();
+    const auto& solution4 = optimizer4.GetSolution();
+    const auto& vector4 = solution4.GetMatrix().GetColumn(0);
+
+    double comparisonTolerance = 1.0e-7;
+
+    std::string realName = typeid(RealType).name();
+    std::string lossName = typeid(LossFunctionType).name();
+    lossName = lossName.substr(lossName.find_last_of(":") + 1);
+
+    // test if the two solutions are identical
+    testing::ProcessTest("TestSolutionEquivalenceSGD (v1 == v2) <" + realName + ", " + lossName + ">", vector1.GetSubVector(0, exampleSize).IsEqual(vector2, comparisonTolerance));
+    testing::ProcessTest("TestSolutionEquivalenceSGD (v1.last == b2) <" + realName + ", " + lossName + ">", testing::IsEqual(vector1[exampleSize], solution2.GetBias(), comparisonTolerance));
+    testing::ProcessTest("TestSolutionEquivalenceSGD (v1 == v3) <" + realName + ", " + lossName + ">", vector1.IsEqual(vector3, comparisonTolerance));
+    testing::ProcessTest("TestSolutionEquivalenceSGD (v2 == v4) <" + realName + ", " + lossName + ">", vector2.IsEqual(vector4, comparisonTolerance));
+    testing::ProcessTest("TestSolutionEquivalenceSGD (b2 == b4) <" + realName + ", " + lossName + ">", testing::IsEqual(solution4.GetBias()[0], solution2.GetBias(), comparisonTolerance));
+}
+
+// Run the SDCA trainer with four different solution types and confirm that the result is identical
+template <typename RealType, typename LossFunctionType, typename RegularizerType>
+void TestSolutionEquivalenceSDCA(double regularizationParameter)
+{
+    std::string randomSeedString = "54321blastoff";
+    std::seed_seq seed(randomSeedString.begin(), randomSeedString.end());
+    std::default_random_engine randomEngine;
+
+    const size_t numExamples = 5;
+    const size_t exampleSize = 7;
+
+    randomEngine.seed(seed);
+    auto examples1 = GetRandomExampleSet<RealType, VectorScalarExampleType<RealType>, VectorRefScalarExampleType<RealType>>(numExamples, exampleSize, randomEngine, 1);
+
+    randomEngine.seed(seed);
+    auto examples2 = GetRandomExampleSet<RealType, VectorScalarExampleType<RealType>, VectorRefScalarExampleType<RealType>>(numExamples, exampleSize, randomEngine, 0);
+
+    randomEngine.seed(seed);
+    auto examples3 = GetRandomExampleSet<RealType, VectorVectorExampleType<RealType>, VectorRefVectorRefExampleType<RealType>>(numExamples, exampleSize, randomEngine, 1);
+
+    randomEngine.seed(seed);
+    auto examples4 = GetRandomExampleSet<RealType, VectorVectorExampleType<RealType>, VectorRefVectorRefExampleType<RealType>>(numExamples, exampleSize, randomEngine, 0);
+
+    // setup four equivalent optimizers
+    auto optimizer1 = MakeSDCAOptimizer<VectorSolution<RealType>>(examples1, LossFunctionType{}, RegularizerType{}, { regularizationParameter });
+    optimizer1.PerformEpochs();
+    const auto& solution1 = optimizer1.GetSolution();
+    const auto& vector1 = solution1.GetVector();
+
+    auto optimizer2 = MakeSDCAOptimizer<VectorSolution<RealType, true>>(examples2, LossFunctionType{}, RegularizerType{}, { regularizationParameter });
+    optimizer2.PerformEpochs();
+    const auto& solution2 = optimizer2.GetSolution();
+    const auto& vector2 = solution2.GetVector();
+
+    auto optimizer3 = MakeSDCAOptimizer<MatrixSolution<RealType>>(examples3, MultivariateLoss<LossFunctionType>{}, RegularizerType{}, { regularizationParameter });
+    optimizer3.PerformEpochs();
+    const auto& solution3 = optimizer3.GetSolution();
+    const auto& vector3 = solution3.GetMatrix().GetColumn(0);
+
+    auto optimizer4 = MakeSDCAOptimizer<MatrixSolution<RealType, true>>(examples4, MultivariateLoss<LossFunctionType>{}, RegularizerType{}, { regularizationParameter });
+    optimizer4.PerformEpochs();
+    const auto& solution4 = optimizer4.GetSolution();
+    const auto& vector4 = solution4.GetMatrix().GetColumn(0);
+
+    double comparisonTolerance = 1.0e-6;
+
+    std::string realName = typeid(RealType).name();
+    std::string lossName = typeid(LossFunctionType).name();
+    lossName = lossName.substr(lossName.find_last_of(":") + 1);
+
+    // test if the two solutions are identical
+    testing::ProcessTest("TestSolutionEquivalenceSDCA (v1 == v2) <" + realName + ", " + lossName + ">", vector1.GetSubVector(0, exampleSize).IsEqual(vector2, comparisonTolerance));
+    testing::ProcessTest("TestSolutionEquivalenceSDCA (v1.last == b2) <" + realName + ", " + lossName + ">", testing::IsEqual(vector1[exampleSize], solution2.GetBias(), comparisonTolerance));
+    testing::ProcessTest("TestSolutionEquivalenceSDCA (v1 == v3) <" + realName + ", " + lossName + ">", vector1.IsEqual(vector3, comparisonTolerance));
+    testing::ProcessTest("TestSolutionEquivalenceSDCA (v2 == v4) <" + realName + ", " + lossName + ">", vector2.IsEqual(vector4, comparisonTolerance));
+    testing::ProcessTest("TestSolutionEquivalenceSDCA (b2 == b4) <" + realName + ", " + lossName + ">", testing::IsEqual(solution4.GetBias()[0], solution2.GetBias(), comparisonTolerance));
+};
+
+#pragma endregion implementation
diff --git a/libraries/trainers/optimization/test/tcc/LossFunction_test.tcc b/libraries/trainers/optimization/test/tcc/LossFunction_test.tcc
deleted file mode 100644
index b1158c03d..000000000
--- a/libraries/trainers/optimization/test/tcc/LossFunction_test.tcc
+++ /dev/null
@@ -1,135 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     LossFunction_test.tcc (optimization_test)
-//  Authors:  Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#include <testing/include/testing.h>
-
-#include <trainers/optimization/include/GoldenSectionMinimizer.h>
-
-#include <algorithm>
-#include <cmath>
-
-using namespace ell;
-using namespace ell::trainers::optimization;
-
-template <typename LossFunctionType>
-double TestDerivative(LossFunctionType loss, double prediction, double output)
-{
-    const double epsilon = 1.0e-6;
-    double lossPlus = loss.Value(prediction + epsilon, output);
-    double lossMinus = loss.Value(prediction - epsilon, output);
-    double difference = lossPlus - lossMinus;
-    double limit = difference / (2 * epsilon);
-    double derivative = loss.Derivative(prediction, output);
-    double error = std::abs(derivative - limit);
-
-    return error;
-}
-
-template <typename LossFunctionType>
-void TestDerivative(LossFunctionType loss, Range predictionRange, Range outputRange)
-{
-    double errorTolerance = 1.0e-6;
-    double maxError = 0;
-
-    for (double prediction = predictionRange.from; prediction <= predictionRange.to; prediction += predictionRange.increment)
-    {
-        for (double output = outputRange.from; output <= outputRange.to; output += outputRange.increment)
-        {
-            maxError = std::max(maxError, TestDerivative(loss, prediction, output));
-        }
-    }
-
-    std::string lossName = typeid(LossFunctionType).name();
-    lossName = lossName.substr(lossName.find_last_of(":") + 1);
-
-    testing::ProcessTest("TestDerivative <" + lossName + ">", maxError < errorTolerance);
-}
-
-template <typename LossFunctionType>
-bool TestConjugate(LossFunctionType loss, double v, double output, double lower, double upper)
-{
-    const double tolerance = 1.0e-6;
-
-    double conjugate = loss.Conjugate(v, output);
-    if (std::isinf(conjugate))
-    {
-        return true;
-    }
-
-    auto objective = [&](double x) { return conjugate - x * v + loss.Value(x, output); };
-    auto minimizer = GoldenSectionMinimizer(objective, lower, upper);
-    minimizer.MinimizeToPrecision(tolerance);
-    if (minimizer.GetMinUpperBound() < tolerance && minimizer.GetMinLowerBound() > -tolerance)
-    {
-        return true;
-    }
-    return false;
-}
-
-template <typename LossFunctionType>
-void TestConjugate(LossFunctionType loss, Range vRange, Range outputRange, double lower, double upper)
-{
-    bool success = true;
-    for (double v = vRange.from; v <= vRange.to; v += vRange.increment)
-    {
-        for (double output = outputRange.from; output <= outputRange.to; output += outputRange.increment)
-        {
-            if (!TestConjugate(loss, v, output, lower, upper))
-            {
-                success = false;
-            }
-        }
-    }
-
-    std::string lossName = typeid(LossFunctionType).name();
-    lossName = lossName.substr(lossName.find_last_of(":") + 1);
-
-    testing::ProcessTest("TestConjugate <" + lossName + ">", success);
-}
-
-template <typename LossFunctionType>
-bool TestConjugateProx(LossFunctionType loss, double theta, double z, double output, double lower, double upper)
-{
-    const double tolerance = 1.0e-6;
-
-    double conjugateProx = loss.ConjugateProx(theta, z, output);
-    double conjugateProxValue = theta * loss.Conjugate(conjugateProx, output) + 0.5 * (conjugateProx - z) * (conjugateProx - z);
-    auto objective = [&](double x) { return theta * loss.Conjugate(x, output) + 0.5 * (x - z) * (x - z) - conjugateProxValue; };
-
-    auto minimizer = GoldenSectionMinimizer(objective, lower, upper);
-    minimizer.MinimizeToPrecision(tolerance);
-    if (minimizer.GetMinUpperBound() < tolerance && minimizer.GetMinLowerBound() > -tolerance)
-    {
-        return true;
-    }
-    return false;
-}
-
-template <typename LossFunctionType>
-void TestConjugateProx(LossFunctionType loss, Range thetaRange, Range zRange, Range outputRange, double lower, double upper)
-{
-    bool success = true;
-    for (double z = zRange.from; z <= zRange.to; z += zRange.increment)
-    {
-        for (double output = outputRange.from; output <= outputRange.to; output += outputRange.increment)
-        {
-            for (double theta = thetaRange.from; theta <= thetaRange.to; theta += thetaRange.increment)
-            {
-                if (!TestConjugateProx(loss, theta, z, output, lower, upper))
-                {
-                    success = false;
-                }
-            }
-        }
-    }
-
-    std::string lossName = typeid(LossFunctionType).name();
-    lossName = lossName.substr(lossName.find_last_of(":") + 1);
-
-    testing::ProcessTest("TestConjugateProx <" + lossName + ">", success);
-}
diff --git a/libraries/trainers/optimization/test/tcc/Optimizer_test.tcc b/libraries/trainers/optimization/test/tcc/Optimizer_test.tcc
deleted file mode 100644
index a14368c0e..000000000
--- a/libraries/trainers/optimization/test/tcc/Optimizer_test.tcc
+++ /dev/null
@@ -1,104 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     Optimizer_test.tcc (optimization_test)
-//  Authors:  Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#include "RandomExampleSet.h"
-
-#include <trainers/optimization/include/IndexedContainer.h>
-#include <trainers/optimization/include/OptimizationExample.h>
-#include <trainers/optimization/include/SDCAOptimizer.h>
-#include <trainers/optimization/include/VectorSolution.h>
-
-#include <testing/include/testing.h>
-
-#include <memory>
-#include <string>
-
-using namespace ell;
-using namespace ell::trainers::optimization;
-
-// assert that the duality gap tends to zero
-template <typename LossFunctionType, typename RegularizerType>
-void TestSDCARegressionConvergence(LossFunctionType lossFunction, RegularizerType regularizer, SDCAOptimizerParameters parameters, double biasVariance, double inputVariance, double outputVariance)
-{
-    size_t count = 500;
-    size_t size = 17;
-    size_t epochs = 50;
-
-    std::string randomSeedString = "GoodLuckMan";
-    std::seed_seq seed(randomSeedString.begin(), randomSeedString.end());
-    std::default_random_engine randomEngine(seed);
-
-    // create random solution
-    VectorSolution<double, true> solution(size);
-    std::normal_distribution<double> biasDistribution(0, biasVariance);
-    solution.GetBias() = biasDistribution(randomEngine);
-
-    std::uniform_int_distribution<int> vectorDistribution(-1, 1);
-    solution.GetVector().Generate([&]() { return vectorDistribution(randomEngine); });
-
-    // create random dataset
-    auto examples = GetRegressionExampleSet(count, inputVariance, outputVariance, solution, randomEngine);
-
-    // create optimizer
-    auto optimizer = MakeSDCAOptimizer<VectorSolution<double, true>>(examples, lossFunction, regularizer, parameters);
-    optimizer.PerformEpochs(epochs);
-    double dualityGap = optimizer.GetSolutionInfo().DualityGap();
-
-    // perform test
-    std::string lossName = typeid(LossFunctionType).name();
-    lossName = lossName.substr(lossName.find_last_of(":") + 1);
-    std::string regularizerName = typeid(RegularizerType).name();
-    regularizerName = regularizerName.substr(regularizerName.find_last_of(":") + 1);
-
-    testing::ProcessTest("TestSDCARegressionConvergence <" + lossName + ", " + regularizerName + ">", dualityGap <= parameters.desiredDualityGap);
-
-    //std::cout << solution.GetBias() << "; " << solution.GetVector() << std::endl;
-    //std::cout << optimizer.GetSolution().GetBias() << "; " << optimizer.GetSolution().GetVector() << "\t" << optimizer.GetSolutionInfo().primalObjective << std::endl;
-}
-
-#include <iostream>
-
-// assert that the duality gap tends to zero
-template <typename LossFunctionType, typename RegularizerType>
-void TestSDCAClassificationConvergence(LossFunctionType lossFunction, RegularizerType regularizer, SDCAOptimizerParameters parameters, double biasVariance, double marginMean, double inputVariance)
-{
-    size_t count = 500;
-    size_t size = 17;
-    size_t epochs = 50;
-
-    std::string randomSeedString = "GoodLuckMan";
-    std::seed_seq seed(randomSeedString.begin(), randomSeedString.end());
-    std::default_random_engine randomEngine(seed);
-
-    // create random solution
-    VectorSolution<double, true> solution(size);
-    std::normal_distribution<double> biasDistribution(0, biasVariance);
-    solution.GetBias() = biasDistribution(randomEngine);
-
-    std::uniform_int_distribution<int> vectorDistribution(-1, 1);
-    solution.GetVector().Generate([&]() { return vectorDistribution(randomEngine); });
-
-    // create random dataset
-    auto examples = GetClassificationExampleSet(count, marginMean, inputVariance, solution, randomEngine);
-
-    // create optimizer
-    auto optimizer = MakeSDCAOptimizer<VectorSolution<double, true>>(examples, lossFunction, regularizer, parameters);
-    optimizer.PerformEpochs(epochs);
-    double dualityGap = optimizer.GetSolutionInfo().DualityGap();
-
-    // perform test
-    std::string lossName = typeid(LossFunctionType).name();
-    lossName = lossName.substr(lossName.find_last_of(":") + 1);
-    std::string regularizerName = typeid(RegularizerType).name();
-    regularizerName = regularizerName.substr(regularizerName.find_last_of(":") + 1);
-
-    testing::ProcessTest("TestSDCAClassificationConvergence <" + lossName + ", " + regularizerName + ">", dualityGap <= parameters.desiredDualityGap);
-
-    //std::cout << solution.GetBias() << "; " << solution.GetVector() << std::endl;
-    //std::cout << optimizer.GetSolution().GetBias() << "; " << optimizer.GetSolution().GetVector() << "\t" << optimizer.GetSolutionInfo().primalObjective << std::endl;
-}
diff --git a/libraries/trainers/optimization/test/tcc/RandomExampleSet.tcc b/libraries/trainers/optimization/test/tcc/RandomExampleSet.tcc
deleted file mode 100644
index 0bbbd7598..000000000
--- a/libraries/trainers/optimization/test/tcc/RandomExampleSet.tcc
+++ /dev/null
@@ -1,39 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     RandomExampleSet.tcc (optimization_test)
-//  Authors:  Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-using namespace ell;
-
-template <typename ElementType, typename ExampleType>
-ExampleType GetRandomExample(size_t randomVectorSize, std::default_random_engine& randomEngine, size_t numConstantFeatures = 0)
-{
-    // allocate vector
-    math::RowVector<ElementType> vector(randomVectorSize + numConstantFeatures);
-    vector.Fill(1);
-    auto vectorView = vector.GetSubVector(0, randomVectorSize);
-
-    // generate random values
-    std::normal_distribution<double> normal(0, 200);
-    vectorView.Generate([&]() { return static_cast<ElementType>(normal(randomEngine)); });
-
-    ElementType output = randomEngine() % 2 == 0 ? static_cast<ElementType>(-1) : static_cast<ElementType>(1);
-
-    using OutputType = typename ExampleType::OutputType;
-    return ExampleType{ std::move(vector), OutputType{ output } };
-}
-
-template <typename ElementType, typename VectorExampleType, typename IndexedContainerExampleType>
-std::shared_ptr<VectorIndexedContainer<VectorExampleType, IndexedContainerExampleType>> GetRandomExampleSet(size_t count, size_t randomVectorSize, std::default_random_engine& randomEngine, size_t numConstantFeatures)
-{
-    auto exampleSet = std::make_shared<VectorIndexedContainer<VectorExampleType, IndexedContainerExampleType>>();
-    exampleSet->reserve(count);
-    for (size_t i = 0; i < count; ++i)
-    {
-        exampleSet->push_back(GetRandomExample<ElementType, VectorExampleType>(randomVectorSize, randomEngine, numConstantFeatures));
-    }
-    return exampleSet;
-}
diff --git a/libraries/trainers/optimization/test/tcc/Solution_test.tcc b/libraries/trainers/optimization/test/tcc/Solution_test.tcc
deleted file mode 100644
index 95f55256b..000000000
--- a/libraries/trainers/optimization/test/tcc/Solution_test.tcc
+++ /dev/null
@@ -1,155 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     Solution_test.tcc (optimization_test)
-//  Authors:  Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#include "RandomExampleSet.h"
-
-#include <trainers/optimization/include/IndexedContainer.h>
-#include <trainers/optimization/include/MatrixSolution.h>
-#include <trainers/optimization/include/MultivariateLoss.h>
-#include <trainers/optimization/include/OptimizationExample.h>
-#include <trainers/optimization/include/SDCAOptimizer.h>
-#include <trainers/optimization/include/SGDOptimizer.h>
-#include <trainers/optimization/include/VectorSolution.h>
-
-#include <testing/include/testing.h>
-
-#include <math/include/Vector.h>
-
-#include <memory>
-#include <string>
-
-using namespace ell;
-using namespace ell::trainers::optimization;
-
-template <typename T>
-using VectorScalarExampleType = Example<math::RowVector<T>, T>;
-
-template <typename T>
-using VectorRefScalarExampleType = Example<math::ConstRowVectorReference<T>, T>;
-
-template <typename T>
-using VectorVectorExampleType = Example<math::RowVector<T>, math::RowVector<T>>;
-
-template <typename T>
-using VectorRefVectorRefExampleType = Example<math::ConstRowVectorReference<T>, math::ConstRowVectorReference<T>>;
-
-// Run the SGD trainer with four different solution types and confirm that the result is identical
-template <typename RealType, typename LossFunctionType, typename RegularizerType>
-void TestSolutionEquivalenceSGD(double regularizationParameter)
-{
-    std::string randomSeedString = "54321blastoff";
-    std::seed_seq seed(randomSeedString.begin(), randomSeedString.end());
-    std::default_random_engine randomEngine;
-
-    const size_t numExamples = 5;
-    const size_t exampleSize = 12;
-
-    randomEngine.seed(seed);
-    auto examples1 = GetRandomExampleSet<RealType, VectorScalarExampleType<RealType>, VectorRefScalarExampleType<RealType>>(numExamples, exampleSize, randomEngine, 1);
-
-    randomEngine.seed(seed);
-    auto examples2 = GetRandomExampleSet<RealType, VectorScalarExampleType<RealType>, VectorRefScalarExampleType<RealType>>(numExamples, exampleSize, randomEngine, 0);
-
-    randomEngine.seed(seed);
-    auto examples3 = GetRandomExampleSet<RealType, VectorVectorExampleType<RealType>, VectorRefVectorRefExampleType<RealType>>(numExamples, exampleSize, randomEngine, 1);
-
-    randomEngine.seed(seed);
-    auto examples4 = GetRandomExampleSet<RealType, VectorVectorExampleType<RealType>, VectorRefVectorRefExampleType<RealType>>(numExamples, exampleSize, randomEngine, 0);
-
-    // setup four equivalent optimizers
-    auto optimizer1 = MakeSGDOptimizer<VectorSolution<RealType>>(examples1, LossFunctionType{}, { regularizationParameter });
-    optimizer1.PerformEpochs();
-    const auto& solution1 = optimizer1.GetSolution();
-    const auto& vector1 = solution1.GetVector();
-
-    auto optimizer2 = MakeSGDOptimizer<VectorSolution<RealType, true>>(examples2, LossFunctionType{}, { regularizationParameter });
-    optimizer2.PerformEpochs();
-    const auto& solution2 = optimizer2.GetSolution();
-    const auto& vector2 = solution2.GetVector();
-
-    auto optimizer3 = MakeSGDOptimizer<MatrixSolution<RealType>>(examples3, MultivariateLoss<LossFunctionType>{}, { regularizationParameter });
-    optimizer3.PerformEpochs();
-    const auto& solution3 = optimizer3.GetSolution();
-    const auto& vector3 = solution3.GetMatrix().GetColumn(0);
-
-    auto optimizer4 = MakeSGDOptimizer<MatrixSolution<RealType, true>>(examples4, MultivariateLoss<LossFunctionType>{}, { regularizationParameter });
-    optimizer4.PerformEpochs();
-    const auto& solution4 = optimizer4.GetSolution();
-    const auto& vector4 = solution4.GetMatrix().GetColumn(0);
-
-    double comparisonTolerance = 1.0e-7;
-
-    std::string realName = typeid(RealType).name();
-    std::string lossName = typeid(LossFunctionType).name();
-    lossName = lossName.substr(lossName.find_last_of(":") + 1);
-
-    // test if the two solutions are identical
-    testing::ProcessTest("TestSolutionEquivalenceSGD (v1 == v2) <" + realName + ", " + lossName + ">", vector1.GetSubVector(0, exampleSize).IsEqual(vector2, comparisonTolerance));
-    testing::ProcessTest("TestSolutionEquivalenceSGD (v1.last == b2) <" + realName + ", " + lossName + ">", testing::IsEqual(vector1[exampleSize], solution2.GetBias(), comparisonTolerance));
-    testing::ProcessTest("TestSolutionEquivalenceSGD (v1 == v3) <" + realName + ", " + lossName + ">", vector1.IsEqual(vector3, comparisonTolerance));
-    testing::ProcessTest("TestSolutionEquivalenceSGD (v2 == v4) <" + realName + ", " + lossName + ">", vector2.IsEqual(vector4, comparisonTolerance));
-    testing::ProcessTest("TestSolutionEquivalenceSGD (b2 == b4) <" + realName + ", " + lossName + ">", testing::IsEqual(solution4.GetBias()[0], solution2.GetBias(), comparisonTolerance));
-}
-
-// Run the SDCA trainer with four different solution types and confirm that the result is identical
-template <typename RealType, typename LossFunctionType, typename RegularizerType>
-void TestSolutionEquivalenceSDCA(double regularizationParameter)
-{
-    std::string randomSeedString = "54321blastoff";
-    std::seed_seq seed(randomSeedString.begin(), randomSeedString.end());
-    std::default_random_engine randomEngine;
-
-    const size_t numExamples = 5;
-    const size_t exampleSize = 7;
-
-    randomEngine.seed(seed);
-    auto examples1 = GetRandomExampleSet<RealType, VectorScalarExampleType<RealType>, VectorRefScalarExampleType<RealType>>(numExamples, exampleSize, randomEngine, 1);
-
-    randomEngine.seed(seed);
-    auto examples2 = GetRandomExampleSet<RealType, VectorScalarExampleType<RealType>, VectorRefScalarExampleType<RealType>>(numExamples, exampleSize, randomEngine, 0);
-
-    randomEngine.seed(seed);
-    auto examples3 = GetRandomExampleSet<RealType, VectorVectorExampleType<RealType>, VectorRefVectorRefExampleType<RealType>>(numExamples, exampleSize, randomEngine, 1);
-
-    randomEngine.seed(seed);
-    auto examples4 = GetRandomExampleSet<RealType, VectorVectorExampleType<RealType>, VectorRefVectorRefExampleType<RealType>>(numExamples, exampleSize, randomEngine, 0);
-
-    // setup four equivalent optimizers
-    auto optimizer1 = MakeSDCAOptimizer<VectorSolution<RealType>>(examples1, LossFunctionType{}, RegularizerType{}, { regularizationParameter });
-    optimizer1.PerformEpochs();
-    const auto& solution1 = optimizer1.GetSolution();
-    const auto& vector1 = solution1.GetVector();
-
-    auto optimizer2 = MakeSDCAOptimizer<VectorSolution<RealType, true>>(examples2, LossFunctionType{}, RegularizerType{}, { regularizationParameter });
-    optimizer2.PerformEpochs();
-    const auto& solution2 = optimizer2.GetSolution();
-    const auto& vector2 = solution2.GetVector();
-
-    auto optimizer3 = MakeSDCAOptimizer<MatrixSolution<RealType>>(examples3, MultivariateLoss<LossFunctionType>{}, RegularizerType{}, { regularizationParameter });
-    optimizer3.PerformEpochs();
-    const auto& solution3 = optimizer3.GetSolution();
-    const auto& vector3 = solution3.GetMatrix().GetColumn(0);
-
-    auto optimizer4 = MakeSDCAOptimizer<MatrixSolution<RealType, true>>(examples4, MultivariateLoss<LossFunctionType>{}, RegularizerType{}, { regularizationParameter });
-    optimizer4.PerformEpochs();
-    const auto& solution4 = optimizer4.GetSolution();
-    const auto& vector4 = solution4.GetMatrix().GetColumn(0);
-
-    double comparisonTolerance = 1.0e-6;
-
-    std::string realName = typeid(RealType).name();
-    std::string lossName = typeid(LossFunctionType).name();
-    lossName = lossName.substr(lossName.find_last_of(":") + 1);
-
-    // test if the two solutions are identical
-    testing::ProcessTest("TestSolutionEquivalenceSDCA (v1 == v2) <" + realName + ", " + lossName + ">", vector1.GetSubVector(0, exampleSize).IsEqual(vector2, comparisonTolerance));
-    testing::ProcessTest("TestSolutionEquivalenceSDCA (v1.last == b2) <" + realName + ", " + lossName + ">", testing::IsEqual(vector1[exampleSize], solution2.GetBias(), comparisonTolerance));
-    testing::ProcessTest("TestSolutionEquivalenceSDCA (v1 == v3) <" + realName + ", " + lossName + ">", vector1.IsEqual(vector3, comparisonTolerance));
-    testing::ProcessTest("TestSolutionEquivalenceSDCA (v2 == v4) <" + realName + ", " + lossName + ">", vector2.IsEqual(vector4, comparisonTolerance));
-    testing::ProcessTest("TestSolutionEquivalenceSDCA (b2 == b4) <" + realName + ", " + lossName + ">", testing::IsEqual(solution4.GetBias()[0], solution2.GetBias(), comparisonTolerance));
-};
diff --git a/libraries/trainers/tcc/EvaluatingTrainer.tcc b/libraries/trainers/tcc/EvaluatingTrainer.tcc
deleted file mode 100644
index f84596d29..000000000
--- a/libraries/trainers/tcc/EvaluatingTrainer.tcc
+++ /dev/null
@@ -1,47 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     EvaluatingTrainer.tcc (trainers)
-//  Authors:  Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#include <utility>
-
-namespace ell
-{
-namespace trainers
-{
-    template <typename PredictorType>
-    EvaluatingTrainer<PredictorType>::EvaluatingTrainer(
-        std::unique_ptr<InternalTrainerType>&& internalTrainer,
-        std::shared_ptr<EvaluatorType> evaluator) :
-        _internalTrainer(std::move(internalTrainer)),
-        _evaluator(evaluator)
-    {
-        assert(_internalTrainer != nullptr);
-        assert(_evaluator != nullptr);
-    }
-
-    template <typename PredictorType>
-    void EvaluatingTrainer<PredictorType>::SetDataset(const data::AnyDataset& anyDataset)
-    {
-        _internalTrainer->SetDataset(anyDataset);
-    }
-
-    template <typename PredictorType>
-    void EvaluatingTrainer<PredictorType>::Update()
-    {
-        _internalTrainer->Update();
-        _evaluator->Evaluate(_internalTrainer->GetPredictor());
-    }
-
-    template <typename PredictorType>
-    EvaluatingTrainer<PredictorType> MakeEvaluatingTrainer(
-        std::unique_ptr<ITrainer<PredictorType>>&& internalTrainer,
-        std::shared_ptr<evaluators::IEvaluator<PredictorType>> evaluator)
-    {
-        return EvaluatingTrainer<PredictorType>(std::move(internalTrainer), evaluator);
-    }
-} // namespace trainers
-} // namespace ell
diff --git a/libraries/trainers/tcc/ForestTrainer.tcc b/libraries/trainers/tcc/ForestTrainer.tcc
deleted file mode 100644
index 340739cf9..000000000
--- a/libraries/trainers/tcc/ForestTrainer.tcc
+++ /dev/null
@@ -1,229 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     ForestTrainer.tcc (trainers)
-//  Authors:  Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-//#define VERBOSE_MODE( x ) x   // uncomment this for very verbose mode
-#define VERBOSE_MODE(x) // uncomment this for nonverbose mode
-
-namespace ell
-{
-namespace trainers
-{
-    template <typename SplitRuleType, typename EdgePredictorType, typename BoosterType>
-    ForestTrainer<SplitRuleType, EdgePredictorType, BoosterType>::ForestTrainer(const BoosterType& booster, const ForestTrainerParameters& parameters) :
-        _booster(booster),
-        _parameters(parameters),
-        _forest()
-    {
-    }
-
-    template <typename SplitRuleType, typename EdgePredictorType, typename BoosterType>
-    void ForestTrainer<SplitRuleType, EdgePredictorType, BoosterType>::SetDataset(const data::AnyDataset& anyDataset)
-    {
-        // materialize a dataset of dense DataVectors with metadata that contains both strong and weak weight and lables for each example
-        _dataset = data::Dataset<TrainerExampleType>(anyDataset);
-
-        // initalizes the special fields in the dataset metadata: weak weight and label, currentOutput
-        for (size_t rowIndex = 0; rowIndex < _dataset.NumExamples(); ++rowIndex)
-        {
-            auto& example = _dataset[rowIndex];
-            auto prediction = _forest.Predict(example.GetDataVector());
-            auto& metadata = example.GetMetadata();
-            metadata.currentOutput = prediction;
-            metadata.weak = _booster.GetWeakWeightLabel(metadata.strong, prediction);
-        }
-    }
-
-    template <typename SplitRuleType, typename EdgePredictorType, typename BoosterType>
-    void ForestTrainer<SplitRuleType, EdgePredictorType, BoosterType>::Update()
-    {
-        // boosting loop (outer loop)
-        for (size_t round = 0; round < _parameters.numRounds; ++round)
-        {
-            // call the booster and compute sums for the entire data set
-            Sums sums = SetWeakWeightsLabels();
-
-            // use the computed sums to calaculate the bias term, set it in the forest and the data set
-            double bias = sums.GetMeanLabel();
-            _forest.AddToBias(bias);
-            UpdateCurrentOutputs(bias);
-
-            VERBOSE_MODE(_dataset.Print(std::cout));
-            VERBOSE_MODE(std::cout << "\nBoosting iteration\n");
-            VERBOSE_MODE(_forest.PrintLine(std::cout, 1));
-
-            // find split candidate for root node and push it onto the priority queue
-            auto rootSplit = GetBestSplitRuleAtNode(_forest.GetNewRootId(), Range{ 0, _dataset.NumExamples() }, sums);
-
-            // check for positive gain
-            if (rootSplit.gain < _parameters.minSplitGain || _parameters.maxSplitsPerRound == 0)
-            {
-                return;
-            }
-
-            // reset the queue and add the root split from the graph
-            if (_queue.size() > 0)
-            {
-                _queue = SplitCandidatePriorityQueue();
-            }
-            _queue.push(std::move(rootSplit));
-
-            // start performing splits until the maximum is reached or the queue is empty
-            PerformSplits(_parameters.maxSplitsPerRound);
-        }
-    }
-
-    template <typename SplitRuleType, typename EdgePredictorType, typename BoosterType>
-    ForestTrainer<SplitRuleType, EdgePredictorType, BoosterType>::SplitCandidate::SplitCandidate(SplittableNodeId nodeId, Range totalRange, Sums totalSums) :
-        gain(0),
-        nodeId(nodeId),
-        stats(totalSums),
-        ranges(totalRange)
-    {
-    }
-
-    template <typename SplitRuleType, typename EdgePredictorType, typename BoosterType>
-    auto ForestTrainer<SplitRuleType, EdgePredictorType, BoosterType>::SetWeakWeightsLabels() -> Sums
-    {
-        Sums sums;
-
-        for (size_t rowIndex = 0; rowIndex < _dataset.NumExamples(); ++rowIndex)
-        {
-            auto& metadata = _dataset[rowIndex].GetMetadata();
-            metadata.weak = _booster.GetWeakWeightLabel(metadata.strong, metadata.currentOutput);
-            sums.Increment(metadata.weak);
-        }
-
-        if (sums.sumWeights == 0.0)
-        {
-            throw utilities::InputException(utilities::InputExceptionErrors::badData, "sum of weights in data is zero");
-        }
-
-        return sums;
-    }
-
-    template <typename SplitRuleType, typename EdgePredictorType, typename BoosterType>
-    void ForestTrainer<SplitRuleType, EdgePredictorType, BoosterType>::UpdateCurrentOutputs(double value)
-    {
-        for (size_t rowIndex = 0; rowIndex < _dataset.NumExamples(); ++rowIndex)
-        {
-            auto& example = _dataset[rowIndex];
-            example.GetMetadata().currentOutput += value;
-        }
-    }
-
-    template <typename SplitRuleType, typename EdgePredictorType, typename BoosterType>
-    void ForestTrainer<SplitRuleType, EdgePredictorType, BoosterType>::UpdateCurrentOutputs(Range range, const EdgePredictorType& edgePredictor)
-    {
-        for (size_t rowIndex = range.firstIndex; rowIndex < range.firstIndex + range.size; ++rowIndex)
-        {
-            auto& example = _dataset[rowIndex];
-            example.GetMetadata().currentOutput += edgePredictor.Predict(example.GetDataVector());
-        }
-    }
-
-    template <typename SplitRuleType, typename EdgePredictorType, typename BoosterType>
-    void ForestTrainer<SplitRuleType, EdgePredictorType, BoosterType>::PerformSplits(size_t maxSplits)
-    {
-        // count splits
-        size_t splitCount = 0;
-
-        // splitting loop (inner loop)
-        while (!_queue.empty())
-        {
-            VERBOSE_MODE(std::cout << "\nSplit iteration\n");
-            VERBOSE_MODE(_queue.PrintLine(std::cout, 1));
-
-            auto splitCandidate = _queue.top();
-            _queue.pop();
-
-            const auto& stats = splitCandidate.stats;
-            const auto& ranges = splitCandidate.ranges;
-
-            // sort the data according to the performed split and update the metadata to reflect this change
-            SortNodeDataset(ranges.GetTotalRange(), splitCandidate.splitRule);
-
-            // update current output field in metadata
-            auto edgePredictors = GetEdgePredictors(stats);
-            for (size_t i = 0; i < splitCandidate.splitRule.NumOutputs(); ++i)
-            {
-                UpdateCurrentOutputs(ranges.GetChildRange(i), edgePredictors[i]);
-            }
-
-            // have the forest perform the split
-            using SplitAction = predictors::SimpleForestPredictor::SplitAction;
-            SplitAction splitAction(splitCandidate.nodeId, splitCandidate.splitRule, edgePredictors);
-            auto interiorNodeIndex = _forest.Split(splitAction);
-
-            VERBOSE_MODE(_dataset.Print(std::cout, 1));
-            VERBOSE_MODE(std::cout << "\n");
-            VERBOSE_MODE(_forest.PrintLine(std::cout, 1));
-
-            // if max number of splits reached, exit the loop
-            if (++splitCount >= maxSplits)
-            {
-                break;
-            }
-
-            // queue new split candidates
-            for (size_t i = 0; i < splitCandidate.splitRule.NumOutputs(); ++i)
-            {
-                auto splitCandidate = GetBestSplitRuleAtNode(_forest.GetChildId(interiorNodeIndex, i), ranges.GetChildRange(i), stats.GetChildSums(i));
-                if (splitCandidate.gain > _parameters.minSplitGain)
-                {
-                    _queue.push(std::move(splitCandidate));
-                }
-            }
-        }
-    }
-
-    template <typename SplitRuleType, typename EdgePredictorType, typename BoosterType>
-    void ForestTrainer<SplitRuleType, EdgePredictorType, BoosterType>::SortNodeDataset(Range range, const SplitRuleType& splitRule)
-    {
-        if (splitRule.NumOutputs() == 2)
-        {
-            _dataset.Partition([splitRule](const data::Example<DataVectorType, TrainerMetadata>& example) { return splitRule.Predict(example.GetDataVector()) == 0; },
-                               range.firstIndex,
-                               range.size);
-        }
-        else
-        {
-            _dataset.Sort([splitRule](const data::Example<DataVectorType, TrainerMetadata>& example) { return splitRule.Predict(example.GetDataVector()); },
-                          range.firstIndex,
-                          range.size);
-        }
-    }
-
-    //
-    // debugging code
-    //
-
-    template <typename SplitRuleType, typename EdgePredictorType, typename BoosterType>
-    void ForestTrainer<SplitRuleType, EdgePredictorType, BoosterType>::SplitCandidatePriorityQueue::PrintLine(std::ostream& os, size_t tabs) const
-    {
-        os << std::string(tabs * 4, ' ') << "Priority Queue Size: " << size() << "\n";
-
-        for (const auto& candidate : std::priority_queue<SplitCandidate>::c) // c is a protected member of std::priority_queue
-        {
-            os << "\n";
-            candidate.PrintLine(os, tabs + 1);
-            os << "\n";
-        }
-    }
-
-    template <typename SplitRuleType, typename EdgePredictorType, typename BoosterType>
-    void ForestTrainer<SplitRuleType, EdgePredictorType, BoosterType>::SplitCandidate::PrintLine(std::ostream& os, size_t tabs) const
-    {
-        os << std::string(tabs * 4, ' ') << "gain = " << gain << "\n";
-        os << std::string(tabs * 4, ' ') << "node = ";
-        nodeId.Print(os);
-        os << "\n";
-        splitRule.PrintLine(os, tabs);
-        stats.PrintLine(os, tabs);
-    }
-} // namespace trainers
-} // namespace ell
diff --git a/libraries/trainers/tcc/HistogramForestTrainer.tcc b/libraries/trainers/tcc/HistogramForestTrainer.tcc
deleted file mode 100644
index 49cc06b7d..000000000
--- a/libraries/trainers/tcc/HistogramForestTrainer.tcc
+++ /dev/null
@@ -1,116 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     HistogramForestTrainer.tcc (trainers)
-//  Authors:  Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#include <utilities/include/RandomEngines.h>
-
-namespace ell
-{
-namespace trainers
-{
-    template <typename LossFunctionType, typename BoosterType, typename ThresholdFinderType>
-    HistogramForestTrainer<LossFunctionType, BoosterType, ThresholdFinderType>::HistogramForestTrainer(const LossFunctionType& lossFunction, const BoosterType& booster, const ThresholdFinderType& thresholdFinder, const HistogramForestTrainerParameters& parameters) :
-        ForestTrainer<SplitRuleType, EdgePredictorType, BoosterType>(booster, parameters),
-        _lossFunction(lossFunction),
-        _thresholdFinder(thresholdFinder),
-        _random(utilities::GetRandomEngine(parameters.randomSeed)),
-        _thresholdFinderSampleSize(parameters.thresholdFinderSampleSize),
-        _candidatesPerInput(parameters.candidatesPerInput)
-    {
-    }
-
-    template <typename LossFunctionType, typename BoosterType, typename ThresholdFinderType>
-    auto HistogramForestTrainer<LossFunctionType, BoosterType, ThresholdFinderType>::GetBestSplitRuleAtNode(SplittableNodeId nodeId, Range range, Sums sums) -> SplitCandidate
-    {
-        SplitCandidate bestSplitCandidate(nodeId, range, sums);
-
-        auto splitRuleCandidates = CallThresholdFinder(range);
-
-        for (const auto& splitRuleCandidate : splitRuleCandidates)
-        {
-            Sums sums0;
-            size_t size0;
-
-            std::tie(sums0, size0) = EvaluateSplitRule(splitRuleCandidate, range);
-
-            Sums sums1 = sums - sums0;
-            double gain = CalculateGain(sums, sums0, sums1);
-
-            // find gain maximizer
-            if (gain > bestSplitCandidate.gain)
-            {
-                bestSplitCandidate.gain = gain;
-                bestSplitCandidate.splitRule = splitRuleCandidate;
-                bestSplitCandidate.ranges.SplitChildRange(0, size0);
-                bestSplitCandidate.stats.SetChildSums({ sums0, sums1 });
-            }
-        }
-
-        return bestSplitCandidate;
-    }
-
-    template <typename LossFunctionType, typename BoosterType, typename ThresholdFinderType>
-    auto HistogramForestTrainer<LossFunctionType, BoosterType, ThresholdFinderType>::GetEdgePredictors(const NodeStats& nodeStats) -> std::vector<EdgePredictorType>
-    {
-        double output = nodeStats.GetTotalSums().GetMeanLabel();
-        double output0 = nodeStats.GetChildSums(0).GetMeanLabel() - output;
-        double output1 = nodeStats.GetChildSums(1).GetMeanLabel() - output;
-        return std::vector<EdgePredictorType>{ output0, output1 };
-    }
-
-    template <typename LossFunctionType, typename BoosterType, typename ThresholdFinderType>
-    double HistogramForestTrainer<LossFunctionType, BoosterType, ThresholdFinderType>::CalculateGain(const Sums& sums, const Sums& sums0, const Sums& sums1) const
-    {
-        if (sums0.sumWeights == 0 || sums1.sumWeights == 0)
-        {
-            return 0;
-        }
-
-        return sums0.sumWeights * _lossFunction.BregmanGenerator(sums0.sumWeightedLabels / sums0.sumWeights) +
-               sums1.sumWeights * _lossFunction.BregmanGenerator(sums1.sumWeightedLabels / sums1.sumWeights) -
-               sums.sumWeights * _lossFunction.BregmanGenerator(sums.sumWeightedLabels / sums.sumWeights);
-    }
-
-    template <typename LossFunctionType, typename BoosterType, typename ThresholdFinderType>
-    auto HistogramForestTrainer<LossFunctionType, BoosterType, ThresholdFinderType>::CallThresholdFinder(Range range) -> std::vector<SplitRuleType>
-    {
-        // uniformly choose _candidatesPerInput from the range, without replacement
-        _dataset.RandomPermute(_random, range.firstIndex, range.size, _thresholdFinderSampleSize);
-
-        auto thresholds = _thresholdFinder.GetThresholds(_dataset.GetExampleReferenceIterator(range.firstIndex, _thresholdFinderSampleSize));
-        return thresholds;
-    }
-
-    template <typename LossFunctionType, typename BoosterType, typename ThresholdFinderType>
-    auto HistogramForestTrainer<LossFunctionType, BoosterType, ThresholdFinderType>::EvaluateSplitRule(const SplitRuleType& splitRule, const Range& range) const -> std::tuple<Sums, size_t>
-    {
-        Sums sums0;
-        size_t size0 = 0;
-
-        auto exampleIterator = _dataset.GetExampleIterator(range.firstIndex, range.size);
-        while (exampleIterator.IsValid())
-        {
-            const auto& example = exampleIterator.Get();
-            auto prediction = splitRule.Predict(example.GetDataVector());
-            if (prediction == 0)
-            {
-                sums0.Increment(example.GetMetadata().weak);
-                ++size0;
-            }
-            exampleIterator.Next();
-        }
-
-        return std::make_tuple(sums0, size0);
-    };
-
-    template <typename LossFunctionType, typename BoosterType, typename ThresholdFinderType>
-    std::unique_ptr<ITrainer<predictors::SimpleForestPredictor>> MakeHistogramForestTrainer(const LossFunctionType& lossFunction, const BoosterType& booster, const ThresholdFinderType& thresholdFinder, const HistogramForestTrainerParameters& parameters)
-    {
-        return std::make_unique<HistogramForestTrainer<LossFunctionType, BoosterType, ThresholdFinderType>>(lossFunction, booster, thresholdFinder, parameters);
-    }
-} // namespace trainers
-} // namespace ell
diff --git a/libraries/trainers/tcc/MeanCalculator.tcc b/libraries/trainers/tcc/MeanCalculator.tcc
deleted file mode 100644
index f5468399a..000000000
--- a/libraries/trainers/tcc/MeanCalculator.tcc
+++ /dev/null
@@ -1,54 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     MeanCalculator.tcc (trainers)
-//  Authors:  Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#include <data/include/TransformedDataVector.h>
-
-namespace ell
-{
-namespace trainers
-{
-    template <data::IterationPolicy policy, typename TransformationType>
-    math::RowVector<double> CalculateTransformedMean(const data::AnyDataset& anyDataset, TransformationType transformation)
-    {
-        // get example iterator
-        auto exampleIterator = anyDataset.GetExampleIterator<data::AutoSupervisedExample>();
-
-        math::RowVector<double> result;
-        size_t count = 0;
-        while (exampleIterator.IsValid())
-        {
-            const auto& x = exampleIterator.Get().GetDataVector();
-            if (x.PrefixLength() > result.Size())
-            {
-                result.Resize(x.PrefixLength());
-            }
-
-            result += data::MakeTransformedDataVector<policy>(x, transformation);
-            ++count;
-            exampleIterator.Next();
-        }
-
-        double scale = 1.0 / count;
-        result.Transform([scale](double x) { return scale * x; });
-
-        return result;
-    }
-
-    template <typename TransformationType>
-    math::RowVector<double> CalculateSparseTransformedMean(const data::AnyDataset& anyDataset, TransformationType transformation)
-    {
-        return CalculateTransformedMean<data::IterationPolicy::skipZeros>(anyDataset, transformation);
-    }
-
-    template <typename TransformationType>
-    math::RowVector<double> CalculateDenseTransformedMean(const data::AnyDataset& anyDataset, TransformationType transformation)
-    {
-        return CalculateTransformedMean<data::IterationPolicy::all>(anyDataset, transformation);
-    }
-} // namespace trainers
-} // namespace ell
diff --git a/libraries/trainers/tcc/ProtoNNTrainerUtils.tcc b/libraries/trainers/tcc/ProtoNNTrainerUtils.tcc
deleted file mode 100644
index fb802eca1..000000000
--- a/libraries/trainers/tcc/ProtoNNTrainerUtils.tcc
+++ /dev/null
@@ -1,129 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     ProtoNNTrainerUtils.h (trainers)
-//  Authors:  Suresh Iyengar
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#pragma once
-
-#include "ProtoNNTrainerUtils.h"
-
-#include <data/include/Dataset.h>
-#include <data/include/Example.h>
-
-#include <cstddef>
-#include <memory>
-
-#include <fstream>
-#include <sstream>
-
-namespace ell
-{
-namespace trainers
-{
-    void ProtoNNTrainerUtils::GetDatasetAsMatrix(const data::AutoSupervisedDataset& anyDataset, math::MatrixReference<double, math::MatrixLayout::columnMajor> X, math::MatrixReference<double, math::MatrixLayout::columnMajor> Y)
-    {
-        auto exampleIterator = anyDataset.GetExampleIterator();
-        int colIdx = 0;
-        while (exampleIterator.IsValid())
-        {
-            // get the Next example
-            const auto& example = exampleIterator.Get();
-            double label = example.GetMetadata().label;
-            const auto& dataVector = example.GetDataVector().ToArray();
-
-            for (size_t j = 0; j < dataVector.size(); j++)
-            {
-                X(j, colIdx) = dataVector[j];
-            }
-
-            for (size_t i = 0; i < Y.NumRows(); i++)
-            {
-                if (i == label)
-                    Y((size_t)i, colIdx) = 1;
-                else
-                    Y((size_t)i, colIdx) = 0;
-            }
-
-            colIdx += 1;
-            exampleIterator.Next();
-        }
-    }
-
-    template <typename math::MatrixLayout Layout>
-    math::Matrix<double, Layout> ProtoNNTrainerUtils::MatrixExp(math::ConstMatrixReference<double, Layout> A)
-    {
-        auto m = A.NumRows();
-        auto n = A.NumColumns();
-        math::Matrix<double, Layout> R(m, n);
-        for (size_t i = 0; i < m; i++)
-        {
-            for (size_t j = 0; j < n; j++)
-            {
-                R(i, j) = std::exp(A(i, j));
-            }
-        }
-
-        return R;
-    }
-
-    template <typename math::MatrixLayout Layout>
-    double ProtoNNTrainerUtils::MatrixNorm(math::ConstMatrixReference<double, Layout> A)
-    {
-        double norm = 0.0;
-        for (size_t i = 0; i < A.NumColumns(); i++)
-        {
-            for (size_t j = 0; j < A.NumRows(); j++)
-            {
-                norm += A(j, i) * A(j, i);
-            }
-        }
-
-        norm = sqrt(norm);
-        return norm;
-    }
-
-    template <typename math::MatrixLayout Layout>
-    double ProtoNNTrainerUtils::MaxAbsoluteElement(math::ConstMatrixReference<double, Layout> A)
-    {
-        double max = A(0, 0);
-        auto m = A.NumRows();
-        auto n = A.NumColumns();
-        for (size_t i = 0; i < m; i++)
-        {
-            for (size_t j = 0; j < n; j++)
-            {
-                max = std::max(max, std::abs(A(i, j)));
-            }
-        }
-
-        return max;
-    }
-
-    void ProtoNNTrainerUtils::HardThresholding(math::MatrixReference<double, math::MatrixLayout::columnMajor> M, double sparsity)
-    {
-        assert(sparsity >= 0.0 && sparsity <= 1.0);
-        if (sparsity >= 0.999)
-            return;
-
-        const double eps = 1e-8;
-
-        std::vector<double> data;
-        data.assign(M.GetDataPointer(), M.GetDataPointer() + (size_t)(M.NumRows() * M.NumColumns()));
-        std::sort(data.begin(), data.end(), [](double i, double j) { return std::abs(i) > std::abs(j); });
-
-        size_t mat_size = M.NumRows() * M.NumColumns();
-
-        double thresh = std::abs(data[(size_t)((sparsity * mat_size) - 1)]);
-        if (thresh <= eps)
-            thresh = eps;
-
-        for (size_t i = 0; i < M.NumColumns(); i++)
-        {
-            M.GetColumn(i).Transform([thresh](double x) { return (std::abs(x) < thresh ? 0.0 : x); });
-        }
-    }
-} // namespace trainers
-} // namespace ell
diff --git a/libraries/trainers/tcc/SDCATrainer.tcc b/libraries/trainers/tcc/SDCATrainer.tcc
deleted file mode 100644
index a71ac8033..000000000
--- a/libraries/trainers/tcc/SDCATrainer.tcc
+++ /dev/null
@@ -1,140 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     SDCATrainer.tcc (trainers)
-//  Authors:  Lin Xiao, Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#include <data/include/DataVectorOperations.h>
-
-#include <utilities/include/RandomEngines.h>
-
-namespace ell
-{
-namespace trainers
-{
-    template <typename LossFunctionType, typename RegularizerType>
-    SDCATrainer<LossFunctionType, RegularizerType>::SDCATrainer(const LossFunctionType& lossFunction, const RegularizerType& regularizer, const SDCATrainerParameters& parameters) :
-        _lossFunction(lossFunction),
-        _regularizer(regularizer),
-        _parameters(parameters)
-    {
-        _random = utilities::GetRandomEngine(parameters.randomSeedString);
-    }
-
-    template <typename LossFunctionType, typename RegularizerType>
-    void SDCATrainer<LossFunctionType, RegularizerType>::SetDataset(const data::AnyDataset& anyDataset)
-    {
-        DEBUG_THROW(_v.Norm0() != 0, utilities::LogicException(utilities::LogicExceptionErrors::illegalState, "can only call SetDataset before updates"));
-
-        _dataset = data::Dataset<TrainerExampleType>(anyDataset);
-        auto numExamples = _dataset.NumExamples();
-        _inverseScaledRegularization = 1.0 / (numExamples * _parameters.regularization);
-
-        _predictorInfo.primalObjective = 0;
-        _predictorInfo.dualObjective = 0;
-
-        // precompute the norm of each example
-        for (size_t rowIndex = 0; rowIndex < numExamples; ++rowIndex)
-        {
-            auto& example = _dataset[rowIndex];
-            example.GetMetadata().norm2Squared = example.GetDataVector().Norm2Squared();
-
-            auto label = example.GetMetadata().weightLabel.label;
-            _predictorInfo.primalObjective += _lossFunction(0, label) / numExamples;
-        }
-    }
-
-    template <typename LossFunctionType, typename RegularizerType>
-    void SDCATrainer<LossFunctionType, RegularizerType>::Update()
-    {
-        if (_parameters.permute)
-        {
-            _dataset.RandomPermute(_random);
-        }
-
-        // Iterate
-        for (size_t i = 0; i < _dataset.NumExamples(); ++i)
-        {
-            Step(_dataset[i]);
-        }
-
-        // Finish
-        ComputeObjectives();
-    }
-
-    template <typename LossFunctionType, typename RegularizerType>
-    SDCATrainer<LossFunctionType, RegularizerType>::TrainerMetadata::TrainerMetadata(const data::WeightLabel& original) :
-        weightLabel(original)
-    {}
-
-    template <typename LossFunctionType, typename RegularizerType>
-    void SDCATrainer<LossFunctionType, RegularizerType>::Step(TrainerExampleType& example)
-    {
-        const auto& dataVector = example.GetDataVector();
-        ResizeTo(dataVector);
-
-        auto weightLabel = example.GetMetadata().weightLabel;
-        auto norm2Squared = example.GetMetadata().norm2Squared + 1; // add one because of bias term
-        auto lipschitz = norm2Squared * _inverseScaledRegularization;
-        auto dual = example.GetMetadata().dualVariable;
-
-        if (lipschitz > 0)
-        {
-            auto prediction = _predictor.Predict(dataVector);
-
-            auto newDual = _lossFunction.ConjugateProx(1.0 / lipschitz, dual + prediction / lipschitz, weightLabel.label);
-            auto dualDiff = newDual - dual;
-
-            if (dualDiff != 0)
-            {
-                _v.Transpose() += (-dualDiff * _inverseScaledRegularization) * dataVector;
-                _d += (-dualDiff * _inverseScaledRegularization);
-                _regularizer.ConjugateGradient(_v, _d, _predictor.GetWeights(), _predictor.GetBias());
-                example.GetMetadata().dualVariable = newDual;
-            }
-        }
-    }
-
-    template <typename LossFunctionType, typename RegularizerType>
-    void SDCATrainer<LossFunctionType, RegularizerType>::ComputeObjectives()
-    {
-        double invSize = 1.0 / _dataset.NumExamples();
-
-        _predictorInfo.primalObjective = 0;
-        _predictorInfo.dualObjective = 0;
-
-        for (size_t i = 0; i < _dataset.NumExamples(); ++i)
-        {
-            const auto& example = _dataset.GetExample(i);
-            auto label = example.GetMetadata().weightLabel.label;
-            auto prediction = _predictor.Predict(example.GetDataVector());
-            auto dualVariable = example.GetMetadata().dualVariable;
-
-            _predictorInfo.primalObjective += invSize * _lossFunction(prediction, label);
-            _predictorInfo.dualObjective -= invSize * _lossFunction.Conjugate(dualVariable, label);
-        }
-
-        _predictorInfo.primalObjective += _parameters.regularization * _regularizer(_predictor.GetWeights(), _predictor.GetBias());
-        _predictorInfo.dualObjective -= _parameters.regularization * _regularizer.Conjugate(_v, _d);
-    }
-
-    template <typename LossFunctionType, typename RegularizerType>
-    void SDCATrainer<LossFunctionType, RegularizerType>::ResizeTo(const data::AutoDataVector& x)
-    {
-        auto xSize = x.PrefixLength();
-        if (xSize > _predictor.Size())
-        {
-            _predictor.Resize(xSize);
-            _v.Resize(xSize);
-        }
-    }
-
-    template <typename LossFunctionType, typename RegularizerType>
-    std::unique_ptr<trainers::ITrainer<predictors::LinearPredictor<double>>> MakeSDCATrainer(const LossFunctionType& lossFunction, const RegularizerType& regularizer, const SDCATrainerParameters& parameters)
-    {
-        return std::make_unique<SDCATrainer<LossFunctionType, RegularizerType>>(lossFunction, regularizer, parameters);
-    }
-} // namespace trainers
-} // namespace ell
diff --git a/libraries/trainers/tcc/SGDTrainer.tcc b/libraries/trainers/tcc/SGDTrainer.tcc
deleted file mode 100644
index 56900702b..000000000
--- a/libraries/trainers/tcc/SGDTrainer.tcc
+++ /dev/null
@@ -1,301 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     SGDTrainer.tcc (trainers)
-//  Authors:  Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#include <cmath>
-
-#include <data/include/DataVector.h>
-#include <data/include/DataVectorOperations.h>
-#include <data/include/Dataset.h>
-
-#include <math/include/VectorOperations.h>
-
-namespace ell
-{
-namespace trainers
-{
-    // the code in this file follows the notation and pseudocode in https://arxiv.org/abs/1612.09147
-
-    //
-    // SGDTrainer
-    //
-
-    template <typename LossFunctionType>
-    SGDTrainer<LossFunctionType>::SGDTrainer(const LossFunctionType& lossFunction, const SGDTrainerParameters& parameters) :
-        SGDTrainerBase(parameters.randomSeedString),
-        _lossFunction(lossFunction),
-        _parameters(parameters)
-    {
-    }
-
-    template <typename LossFunctionType>
-    void SGDTrainer<LossFunctionType>::DoFirstStep(const data::AutoDataVector& x, double y, double weight)
-    {
-        DoNextStep(x, y, weight);
-    }
-
-    template <typename LossFunctionType>
-    void SGDTrainer<LossFunctionType>::DoNextStep(const data::AutoDataVector& x, double y, double weight)
-    {
-        ResizeTo(x);
-        ++_t;
-
-        // Predict
-        double p = _lastPredictor.Predict(x);
-
-        // calculate the loss derivative
-        double g = weight * _lossFunction.GetDerivative(p, y);
-
-        // get abbreviated names
-        auto& lastW = _lastPredictor.GetWeights();
-        double& lastB = _lastPredictor.GetBias();
-
-        // update the (last) predictor
-        double scaleCoefficient = 1.0 - 1.0 / _t;
-        lastW *= scaleCoefficient;
-        lastB *= scaleCoefficient;
-
-        const double lambda = _parameters.regularization;
-        double updateCoefficient = -g / (lambda * _t);
-        lastW.Transpose() += updateCoefficient * x;
-        lastB += updateCoefficient;
-
-        // get abbreviated names
-        auto& averagedW = _averagedPredictor.GetWeights();
-        double& averagedB = _averagedPredictor.GetBias();
-
-        // update the average predictor
-        averagedW *= scaleCoefficient;
-        averagedB *= scaleCoefficient;
-
-        averagedW += 1.0 / _t * lastW;
-        averagedB += lastB / _t;
-    }
-
-    template <typename LossFunctionType>
-    void SGDTrainer<LossFunctionType>::ResizeTo(const data::AutoDataVector& x)
-    {
-        auto xSize = x.PrefixLength();
-        if (xSize > _lastPredictor.Size())
-        {
-            _lastPredictor.Resize(xSize);
-            _averagedPredictor.Resize(xSize);
-        }
-    }
-
-    //
-    // SparseDataSGDTrainer
-    //
-
-    template <typename LossFunctionType>
-    SparseDataSGDTrainer<LossFunctionType>::SparseDataSGDTrainer(const LossFunctionType& lossFunction, const SGDTrainerParameters& parameters) :
-        SGDTrainerBase(parameters.randomSeedString),
-        _lossFunction(lossFunction),
-        _parameters(parameters)
-    {
-    }
-
-    template <typename LossFunctionType>
-    void SparseDataSGDTrainer<LossFunctionType>::DoFirstStep(const data::AutoDataVector& x, double y, double weight)
-    {
-        ResizeTo(x);
-        _t = 1.0;
-        double g = weight * _lossFunction.GetDerivative(0, y);
-        _v.Transpose() += g * x;
-        _a += g;
-        _c = _a;
-        _h = 1.0;
-    }
-
-    template <typename LossFunctionType>
-    void SparseDataSGDTrainer<LossFunctionType>::DoNextStep(const data::AutoDataVector& x, double y, double weight)
-    {
-        ResizeTo(x);
-        ++_t;
-
-        // apply the predictor
-        const double lambda = _parameters.regularization;
-        double d = x * _v;
-        double p = -(d + _a) / (lambda * (_t - 1.0));
-
-        // get the derivative
-        double g = weight * _lossFunction.GetDerivative(p, y);
-
-        // update
-        _v.Transpose() += g * x;
-        _a += g;
-        _u.Transpose() += _h * g * x;
-        _c += _a / _t;
-        _h += 1.0 / _t;
-    }
-
-    template <typename LossFunctionType>
-    auto SparseDataSGDTrainer<LossFunctionType>::GetLastPredictor() const -> const PredictorType&
-    {
-        const double lambda = _parameters.regularization;
-        _lastPredictor.Resize(_v.Size());
-        auto& w = _lastPredictor.GetWeights();
-
-        // define last predictor based on _v, _a, _t
-        w.Reset();
-        w += (-1 / (lambda * _t)) * _v;
-        _lastPredictor.GetBias() = -_a / (lambda * _t);
-        return _lastPredictor;
-    }
-
-    template <typename LossFunctionType>
-    auto SparseDataSGDTrainer<LossFunctionType>::GetAveragedPredictor() const -> const PredictorType&
-    {
-        const double lambda = _parameters.regularization;
-        _averagedPredictor.Resize(_v.Size());
-        auto& w = _averagedPredictor.GetWeights();
-
-        // define averaged predictor based on _v, _h, _u, _t
-        w.Reset();
-        w += -_h / (lambda * _t) * _v;
-        w += 1 / (lambda * _t) * _u;
-
-        _averagedPredictor.GetBias() = -_c / (lambda * _t);
-        return _averagedPredictor;
-    }
-
-    template <typename LossFunctionType>
-    inline void SparseDataSGDTrainer<LossFunctionType>::ResizeTo(const data::AutoDataVector& x)
-    {
-        auto xSize = x.PrefixLength();
-        if (xSize > _v.Size())
-        {
-            _v.Resize(xSize);
-            _u.Resize(xSize);
-        }
-    }
-
-    //
-    // SparseDataCenteredSGDTrainer
-    //
-
-    template <typename LossFunctionType>
-    SparseDataCenteredSGDTrainer<LossFunctionType>::SparseDataCenteredSGDTrainer(const LossFunctionType& lossFunction, math::RowVector<double> center, const SGDTrainerParameters& parameters) :
-        SGDTrainerBase(parameters.randomSeedString),
-        _lossFunction(lossFunction),
-        _parameters(parameters),
-        _center(std::move(center))
-    {
-        _theta = 1 + _center.Norm2Squared();
-    }
-
-    template <typename LossFunctionType>
-    void SparseDataCenteredSGDTrainer<LossFunctionType>::DoFirstStep(const data::AutoDataVector& x, double y, double weight)
-    {
-        ResizeTo(x);
-        _t = 1.0;
-
-        // first, perform the standard SparseDataSGD step
-        double g = weight * _lossFunction.GetDerivative(0, y);
-        _v.Transpose() += g * x;
-        _a += g;
-        _c = _a;
-        _h = 1.0;
-
-        // next, perform the special steps needed for centering
-        double q = x * _center.Transpose();
-        _z = g * q;
-        _r = _a * _theta - _z;
-        _s = _r;
-    }
-
-    template <typename LossFunctionType>
-    void SparseDataCenteredSGDTrainer<LossFunctionType>::DoNextStep(const data::AutoDataVector& x, double y, double weight)
-    {
-        ResizeTo(x);
-        ++_t;
-
-        // apply the predictor
-        const double lambda = _parameters.regularization;
-        double d = x * _v;
-        double q = x * _center.Transpose();
-        double p = -(d + _r - _a * q) / (lambda * (_t - 1.0));
-
-        // get the derivative
-        double g = weight * _lossFunction.GetDerivative(p, y);
-
-        // apply the SparseDataSGD update
-        _v.Transpose() += g * x;
-        _a += g;
-        _u.Transpose() += _h * g * x;
-        _c += _a / _t;
-        _h += 1.0 / _t;
-
-        // next, perform the special steps needed for centering
-        _z += g * q;
-        _r = _a * _theta - _z;
-        _s += _r / _t;
-    }
-
-    template <typename LossFunctionType>
-    auto SparseDataCenteredSGDTrainer<LossFunctionType>::GetLastPredictor() const -> const PredictorType&
-    {
-        const double lambda = _parameters.regularization;
-        _lastPredictor.Resize(_v.Size());
-        auto& w = _lastPredictor.GetWeights();
-        w += (-1 / (lambda * _t)) * _v;
-        _lastPredictor.GetBias() = -_a / (lambda * _t);
-        return _lastPredictor;
-    }
-
-    template <typename LossFunctionType>
-    auto SparseDataCenteredSGDTrainer<LossFunctionType>::GetAveragedPredictor() const -> const PredictorType&
-    {
-        const double lambda = _parameters.regularization;
-        const double coeff = 1.0 / (lambda * _t);
-        _averagedPredictor.Resize(_v.Size());
-        auto& w = _averagedPredictor.GetWeights();
-
-        // define last predictor based on _v, _u, _c
-        w.Reset();
-        w += -_h * coeff * _v;
-        w += coeff * _u;
-        w += _c * coeff * _center.Transpose();
-
-        _averagedPredictor.GetBias() = -_s * coeff;
-        return _averagedPredictor;
-    }
-
-    template <typename LossFunctionType>
-    inline void SparseDataCenteredSGDTrainer<LossFunctionType>::ResizeTo(const data::AutoDataVector& x)
-    {
-        auto xSize = x.PrefixLength();
-        if (xSize > _v.Size())
-        {
-            _v.Resize(xSize);
-            _u.Resize(xSize);
-        }
-    }
-
-    //
-    // Helper functions
-    //
-
-    template <typename LossFunctionType>
-    std::unique_ptr<ITrainer<predictors::LinearPredictor<double>>> MakeSGDTrainer(const LossFunctionType& lossFunction, const SGDTrainerParameters& parameters)
-    {
-        return std::make_unique<SGDTrainer<LossFunctionType>>(lossFunction, parameters);
-    }
-
-    template <typename LossFunctionType>
-    std::unique_ptr<ITrainer<predictors::LinearPredictor<double>>> MakeSparseDataSGDTrainer(const LossFunctionType& lossFunction, const SGDTrainerParameters& parameters)
-    {
-        return std::make_unique<SparseDataSGDTrainer<LossFunctionType>>(lossFunction, parameters);
-    }
-
-    template <typename LossFunctionType>
-    std::unique_ptr<ITrainer<predictors::LinearPredictor<double>>> MakeSparseDataCenteredSGDTrainer(const LossFunctionType& lossFunction, math::RowVector<double> center, const SGDTrainerParameters& parameters)
-    {
-        return std::make_unique<SparseDataCenteredSGDTrainer<LossFunctionType>>(lossFunction, std::move(center), parameters);
-    }
-} // namespace trainers
-} // namespace ell
diff --git a/libraries/trainers/tcc/SortingForestTrainer.tcc b/libraries/trainers/tcc/SortingForestTrainer.tcc
deleted file mode 100644
index 2176fd003..000000000
--- a/libraries/trainers/tcc/SortingForestTrainer.tcc
+++ /dev/null
@@ -1,104 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     SortingForestTrainer.tcc (trainers)
-//  Authors:  Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace trainers
-{
-    template <typename LossFunctionType, typename BoosterType>
-    SortingForestTrainer<LossFunctionType, BoosterType>::SortingForestTrainer(const LossFunctionType& lossFunction, const BoosterType& booster, const SortingForestTrainerParameters& parameters) :
-        ForestTrainer<SplitRuleType, EdgePredictorType, BoosterType>(booster, parameters),
-        _lossFunction(lossFunction)
-    {
-    }
-
-    template <typename LossFunctionType, typename BoosterType>
-    auto SortingForestTrainer<LossFunctionType, BoosterType>::GetBestSplitRuleAtNode(SplittableNodeId nodeId, Range range, Sums sums) -> SplitCandidate
-    {
-        auto numFeatures = _dataset.NumFeatures();
-
-        SplitCandidate bestSplitCandidate(nodeId, range, sums);
-
-        for (size_t inputIndex = 0; inputIndex < numFeatures; ++inputIndex)
-        {
-            // sort the relevant rows of data set in ascending order by inputIndex
-            SortNodeDataset(range, inputIndex);
-
-            Sums sums0;
-
-            // consider all thresholds
-            double nextFeatureValue = _dataset[range.firstIndex].GetDataVector()[inputIndex];
-            for (size_t rowIndex = range.firstIndex; rowIndex < range.firstIndex + range.size - 1; ++rowIndex)
-            {
-                // get friendly names
-                double currentFeatureValue = nextFeatureValue;
-                nextFeatureValue = _dataset[rowIndex + 1].GetDataVector()[inputIndex];
-
-                // increment sums
-                sums0.Increment(_dataset[rowIndex].GetMetadata().weak);
-
-                // only split between rows with different feature values
-                if (currentFeatureValue == nextFeatureValue)
-                {
-                    continue;
-                }
-
-                // compute sums1 and gain
-                auto sums1 = sums - sums0;
-                double gain = CalculateGain(sums, sums0, sums1);
-
-                // find gain maximizer
-                if (gain > bestSplitCandidate.gain)
-                {
-                    bestSplitCandidate.gain = gain;
-                    bestSplitCandidate.splitRule = SplitRuleType{ inputIndex, 0.5 * (currentFeatureValue + nextFeatureValue) };
-                    bestSplitCandidate.ranges.SplitChildRange(0, rowIndex - range.firstIndex + 1);
-                    bestSplitCandidate.stats.SetChildSums({ sums0, sums1 });
-                }
-            }
-        }
-        return bestSplitCandidate;
-    }
-
-    template <typename LossFunctionType, typename BoosterType>
-    auto SortingForestTrainer<LossFunctionType, BoosterType>::GetEdgePredictors(const NodeStats& nodeStats) -> std::vector<EdgePredictorType>
-    {
-        double output = nodeStats.GetTotalSums().GetMeanLabel();
-        double output0 = nodeStats.GetChildSums(0).GetMeanLabel() - output;
-        double output1 = nodeStats.GetChildSums(1).GetMeanLabel() - output;
-        return std::vector<EdgePredictorType>{ output0, output1 };
-    }
-
-    template <typename LossFunctionType, typename BoosterType>
-    void SortingForestTrainer<LossFunctionType, BoosterType>::SortNodeDataset(Range range, size_t inputIndex)
-    {
-        _dataset.Sort([inputIndex](const data::Example<DataVectorType, TrainerMetadata>& example) { return example.GetDataVector()[inputIndex]; },
-                      range.firstIndex,
-                      range.size);
-    }
-
-    template <typename LossFunctionType, typename BoosterType>
-    double SortingForestTrainer<LossFunctionType, BoosterType>::CalculateGain(const Sums& sums, const Sums& sums0, const Sums& sums1) const
-    {
-        if (sums0.sumWeights == 0 || sums1.sumWeights == 0)
-        {
-            return 0;
-        }
-
-        return sums0.sumWeights * _lossFunction.BregmanGenerator(sums0.sumWeightedLabels / sums0.sumWeights) +
-               sums1.sumWeights * _lossFunction.BregmanGenerator(sums1.sumWeightedLabels / sums1.sumWeights) -
-               sums.sumWeights * _lossFunction.BregmanGenerator(sums.sumWeightedLabels / sums.sumWeights);
-    }
-
-    template <typename LossFunctionType, typename BoosterType>
-    std::unique_ptr<ITrainer<predictors::SimpleForestPredictor>> MakeSortingForestTrainer(const LossFunctionType& lossFunction, const BoosterType& booster, const SortingForestTrainerParameters& parameters)
-    {
-        return std::make_unique<SortingForestTrainer<LossFunctionType, BoosterType>>(lossFunction, booster, parameters);
-    }
-} // namespace trainers
-} // namespace ell
diff --git a/libraries/trainers/tcc/SweepingTrainer.tcc b/libraries/trainers/tcc/SweepingTrainer.tcc
deleted file mode 100644
index 664e5ad7f..000000000
--- a/libraries/trainers/tcc/SweepingTrainer.tcc
+++ /dev/null
@@ -1,59 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     SweepingTrainer.tcc (trainers)
-//  Authors:  Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace trainers
-{
-    template <typename PredictorType>
-    SweepingTrainer<PredictorType>::SweepingTrainer(std::vector<EvaluatingTrainerType>&& evaluatingTrainers) :
-        _evaluatingTrainers(std::move(evaluatingTrainers))
-    {
-        assert(_evaluatingTrainers.size() > 0);
-    }
-
-    template <typename PredictorType>
-    void SweepingTrainer<PredictorType>::SetDataset(const data::AnyDataset& anyDataset)
-    {
-        _dataset = data::Dataset<ExampleType>(anyDataset);
-    }
-
-    template <typename PredictorType>
-    void SweepingTrainer<PredictorType>::Update()
-    {
-        for (size_t i = 0; i < _evaluatingTrainers.size(); ++i)
-        {
-            _evaluatingTrainers[i].Update();
-        }
-    }
-
-    template <typename PredictorType>
-    const PredictorType& SweepingTrainer<PredictorType>::GetPredictor() const
-    {
-        double bestGoodness = _evaluatingTrainers[0].GetEvaluator()->GetGoodness();
-        size_t bestIndex = 0;
-        for (size_t i = 1; i < _evaluatingTrainers.size(); ++i)
-        {
-            double goodness = _evaluatingTrainers[i].GetEvaluator()->GetGoodness();
-            if (goodness > bestGoodness)
-            {
-                bestGoodness = goodness;
-                bestIndex = i;
-            }
-        }
-
-        return _evaluatingTrainers[bestIndex].GetPredictor();
-    }
-
-    template <typename PredictorType>
-    std::unique_ptr<ITrainer<PredictorType>> MakeSweepingTrainer(std::vector<EvaluatingTrainer<PredictorType>>&& evaluatingTrainers)
-    {
-        return std::make_unique<SweepingTrainer<PredictorType>>(std::move(evaluatingTrainers));
-    }
-} // namespace trainers
-} // namespace ell
diff --git a/libraries/trainers/tcc/ThresholdFinder.tcc b/libraries/trainers/tcc/ThresholdFinder.tcc
deleted file mode 100644
index 5f6adbffd..000000000
--- a/libraries/trainers/tcc/ThresholdFinder.tcc
+++ /dev/null
@@ -1,71 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     ThresholdFinder.tcc (trainers)
-//  Authors:  Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#include <algorithm>
-
-namespace ell
-{
-namespace trainers
-{
-    template <typename ExampleIteratorType>
-    ThresholdFinder::UniqueValuesResult ThresholdFinder::UniqueValues(ExampleIteratorType exampleIterator) const
-    {
-        std::vector<std::vector<ValueWeight>> result;
-        double totalWeight = 0.0;
-
-        // invert and densify result
-        while (exampleIterator.IsValid())
-        {
-            const auto& example = exampleIterator.Get();
-            const auto& denseDataVector = example.GetDataVector();
-            double weight = example.GetMetadata().weak.weight;
-
-            totalWeight += weight;
-
-            if (result.size() < denseDataVector.PrefixLength())
-            {
-                result.resize(denseDataVector.PrefixLength());
-            }
-
-            for (size_t j = 0; j < denseDataVector.PrefixLength(); ++j)
-            {
-                result[j].push_back({ denseDataVector[j], weight });
-            }
-
-            exampleIterator.Next();
-        }
-
-        // sort and unique each feature
-        for (size_t j = 0; j < result.size(); ++j)
-        {
-            auto newSize = SortReduceCopy(result[j].begin(), result[j].end());
-            result[j].resize(newSize);
-        }
-
-        return { result, totalWeight };
-    }
-
-    template <typename ExampleIteratorType>
-    std::vector<predictors::SingleElementThresholdPredictor> trainers::ExhaustiveThresholdFinder::GetThresholds(ExampleIteratorType exampleIterator) const
-    {
-        auto uniqueValuesResult = UniqueValues(exampleIterator);
-        std::vector<predictors::SingleElementThresholdPredictor> thresholdPredictors;
-
-        for (size_t j = 0; j < uniqueValuesResult.weightedValues.size(); ++j)
-        {
-            const auto& featureValues = uniqueValuesResult.weightedValues[j];
-            for (size_t i = 0; i < featureValues.size() - 1; ++i)
-            {
-                thresholdPredictors.push_back({ j, 0.5 * (featureValues[i].value + featureValues[i + 1].value) });
-            }
-        }
-
-        return thresholdPredictors;
-    }
-} // namespace trainers
-} // namespace ell
diff --git a/libraries/utilities/CMakeLists.txt b/libraries/utilities/CMakeLists.txt
index 1564d6342..7e3560a18 100644
--- a/libraries/utilities/CMakeLists.txt
+++ b/libraries/utilities/CMakeLists.txt
@@ -84,37 +84,10 @@ set(include
   include/XmlArchiver.h
 )
 
-set(tcc
-  tcc/AbstractInvoker.tcc
-  tcc/AnyIterator.tcc
-  tcc/Archiver.tcc
-  tcc/CommandLineParser.tcc
-  tcc/CStringParser.tcc
-  tcc/Exception.tcc
-  tcc/Format.tcc
-  tcc/FunctionUtils.tcc
-  tcc/JsonArchiver.tcc
-  tcc/ObjectArchive.tcc
-  tcc/ObjectArchiver.tcc
-  tcc/Optional.tcc
-  tcc/OutputStreamImpostor.tcc
-  tcc/ParallelTransformIterator.tcc
-  tcc/PropertyBag.tcc
-  tcc/RingBuffer.tcc
-  tcc/StlContainerIterator.tcc
-  tcc/StlStridedIterator.tcc
-  tcc/TransformIterator.tcc
-  tcc/TypeFactory.tcc
-  tcc/TypeName.tcc
-  tcc/Variant.tcc
-  tcc/XmlArchiver.tcc
-)
-
 source_group("src" FILES ${src})
 source_group("include" FILES ${include})
-source_group("tcc" FILES ${tcc})
 
-add_library(${library_name} ${src} ${include} ${tcc})
+add_library(${library_name} ${src} ${include})
 target_include_directories(${library_name} PRIVATE include ${ELL_LIBRARIES_DIR})
 target_link_libraries(${library_name} Threads::Threads)
 
diff --git a/libraries/utilities/include/AbstractInvoker.h b/libraries/utilities/include/AbstractInvoker.h
index 232c18e10..94e5f4585 100644
--- a/libraries/utilities/include/AbstractInvoker.h
+++ b/libraries/utilities/include/AbstractInvoker.h
@@ -82,4 +82,34 @@ namespace utilities
 } // namespace utilities
 } // namespace ell
 
-#include "../tcc/AbstractInvoker.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace utilities
+{
+    template <typename BaseType, typename DerivedType, typename... DerivedTypes>
+    template <typename ReturnType, typename FunctorType>
+    ReturnType AbstractInvoker<BaseType, DerivedType, DerivedTypes...>::Invoke(const FunctorType& functor, const BaseType* basePointer)
+    {
+        const DerivedType* ptr = dynamic_cast<const DerivedType*>(basePointer);
+        if (ptr != nullptr)
+        {
+            return functor(ptr);
+        }
+        else
+        {
+            return AbstractInvoker<BaseType, DerivedTypes...>::template Invoke<ReturnType>(functor, basePointer);
+        }
+    }
+
+    template <typename BaseType>
+    template <typename ReturnType, typename FunctorType>
+    ReturnType AbstractInvoker<BaseType>::Invoke(const FunctorType& /*functor*/, const BaseType* /*basePointer*/)
+    {
+        throw LogicException(LogicExceptionErrors::illegalState, "base type reference could not be matched with a derived type");
+    }
+} // namespace utilities
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/utilities/include/AnyIterator.h b/libraries/utilities/include/AnyIterator.h
index d936dcdd2..56576dcf3 100644
--- a/libraries/utilities/include/AnyIterator.h
+++ b/libraries/utilities/include/AnyIterator.h
@@ -78,4 +78,106 @@ namespace utilities
 } // namespace utilities
 } // namespace ell
 
-#include "../tcc/AnyIterator.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace utilities
+{
+    //
+    // wrapper to convert anything that happens to conform to IIterator interface to an IIterator
+    //
+    template <typename IteratorType, typename ValueType>
+    class IteratorWrapper : public IIterator<ValueType>
+    {
+    public:
+        IteratorWrapper(const IteratorWrapper<IteratorType, ValueType>& other) = default;
+        IteratorWrapper(IteratorWrapper<IteratorType, ValueType>&& other) = default;
+
+        IteratorWrapper(IteratorType&& inputIterator) :
+            _iterator(inputIterator) {}
+
+        virtual bool IsValid() const override { return _iterator.IsValid(); }
+        virtual bool HasSize() const override { return _iterator.HasSize(); }
+        virtual size_t NumItemsLeft() const override { return _iterator.NumItemsLeft(); }
+        virtual void Next() override { _iterator.Next(); }
+        virtual ValueType Get() const override { return _iterator.Get(); }
+
+    private:
+        IteratorType _iterator;
+    };
+
+    //
+    // AnyIterator class implementation
+    //
+    template <typename ValueType>
+    AnyIterator<ValueType>::AnyIterator(std::shared_ptr<IIterator<ValueType>> iterator) :
+        _iterator(iterator)
+    {
+    }
+
+    template <typename ValueType>
+    bool AnyIterator<ValueType>::IsValid() const
+    {
+        if (_iterator == nullptr)
+        {
+            std::string funcName = __func__;
+            throw Exception(funcName + ": invalid iterator");
+        }
+        return _iterator->IsValid();
+    }
+
+    template <typename ValueType>
+    bool AnyIterator<ValueType>::HasSize() const
+    {
+        if (_iterator == nullptr)
+        {
+            std::string funcName = __func__;
+            throw Exception(funcName + ": invalid iterator");
+        }
+        return _iterator->HasSize();
+    }
+
+    template <typename ValueType>
+    size_t AnyIterator<ValueType>::NumItemsLeft() const
+    {
+        if (_iterator == nullptr)
+        {
+            std::string funcName = __func__;
+            throw Exception(funcName + ": invalid iterator");
+        }
+        return _iterator->NumItemsLeft();
+    }
+
+    template <typename ValueType>
+    void AnyIterator<ValueType>::Next()
+    {
+        if (_iterator == nullptr)
+        {
+            std::string funcName = __func__;
+            throw Exception(funcName + ": invalid iterator");
+        }
+        _iterator->Next();
+    }
+
+    template <typename ValueType>
+    ValueType AnyIterator<ValueType>::Get() const
+    {
+        if (_iterator == nullptr)
+        {
+            std::string funcName = __func__;
+            throw Exception(funcName + ": invalid iterator");
+        }
+        return _iterator->Get();
+    }
+
+    template <typename IteratorType, typename ValueType>
+    AnyIterator<ValueType> MakeAnyIterator(IteratorType&& iter)
+    {
+        auto wrapper = std::make_shared<IteratorWrapper<IteratorType, ValueType>>(std::forward<IteratorType>(iter));
+        return AnyIterator<ValueType>(wrapper);
+    }
+} // namespace utilities
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/utilities/include/Archiver.h b/libraries/utilities/include/Archiver.h
index 92d78091a..31f7e6fd7 100644
--- a/libraries/utilities/include/Archiver.h
+++ b/libraries/utilities/include/Archiver.h
@@ -595,4 +595,421 @@ namespace utilities
 } // namespace utilities
 } // namespace ell
 
-#include "../tcc/Archiver.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace utilities
+{
+    //
+    // PropertyArchiver class
+    //
+
+    template <typename ValueType>
+    void Archiver::PropertyArchiver::operator<<(ValueType&& value)
+    {
+        _archiver.Archive(_propertyName.c_str(), value);
+    }
+
+    //
+    // Archiver class
+    //
+
+    template <typename ValueType>
+    void Archiver::Archive(ValueType&& value)
+    {
+        Archive("", std::forward<ValueType>(value));
+    }
+
+    template <typename ValueType>
+    void Archiver::operator<<(ValueType&& value)
+    {
+        Archive(std::forward<ValueType>(value));
+    }
+
+    template <typename ValueType>
+    void Archiver::Archive(const char* name, ValueType&& value)
+    {
+        ArchiveItem(name, value);
+    }
+
+    //
+    // Implementations
+    //
+
+    // unique pointer to non-archivable object
+    template <typename ValueType, IsNotArchivable<ValueType> concept>
+    void Archiver::ArchiveItem(const char* name, const std::unique_ptr<ValueType>& value)
+    {
+        if (!value.get())
+        {
+            // write out a special value indicating null value.
+            ArchiveNull(name);
+        }
+        else
+        {
+            // write out a special value indicating null value.
+            ArchiveItem(name, *value.get());
+        }
+    }
+
+    // unique pointer to standard archivable object
+    template <typename ValueType, IsStandardArchivable<ValueType> concept>
+    void Archiver::ArchiveItem(const char* name, const std::unique_ptr<ValueType>& value)
+    {
+        if (!value.get())
+        {
+            // write out a special value indicating null value.
+            ArchiveNull(name);
+        }
+        else
+        {
+            // write out a special value indicating null value.
+            ArchiveItem(name, *value.get());
+        }
+    }
+
+    // unique pointer to archived-as-primitive object
+    template <typename ValueType, IsArchivedAsPrimitive<ValueType> concept>
+    void Archiver::ArchiveItem(const char* name, const std::unique_ptr<ValueType>& value)
+    {
+        if (!value.get())
+        {
+            // write out a special value indicating null value.
+            ArchiveNull(name);
+        }
+        else
+        {
+            // write out a special value indicating null value.
+            ArchiveItem(name, *value.get());
+        }
+    }
+
+    // Non-vectors
+    template <typename ValueType, IsNotVector<ValueType> concept>
+    void Archiver::ArchiveItem(const char* name, ValueType&& value)
+    {
+        ArchiveValue(name, value);
+    }
+
+    // Pointers
+    template <typename ValueType>
+    void Archiver::ArchiveItem(const char* name, ValueType* value)
+    {
+        Archive(name, *value);
+    }
+
+    // Vector of fundamental types
+    template <typename ValueType, IsFundamental<ValueType> concept>
+    void Archiver::ArchiveItem(const char* name, const std::vector<ValueType>& array)
+    {
+        ArchiveArray(name, array);
+    }
+
+    // Vector of strings
+    inline void Archiver::ArchiveItem(const char* name, const std::vector<std::string>& array)
+    {
+        ArchiveArray(name, array);
+    }
+
+    // Vector of serializable objects
+    template <typename ValueType, IsIArchivable<ValueType> concept>
+    void Archiver::ArchiveItem(const char* name, const std::vector<ValueType>& array)
+    {
+        auto baseTypeName = GetArchivedTypeName<ValueType>();
+        std::vector<const utilities::IArchivable*> tmpArray;
+        for (const auto& item : array)
+        {
+            tmpArray.push_back(&item);
+        }
+        ArchiveArray(name, baseTypeName, tmpArray);
+    }
+
+    // Vector of serializable pointers
+    template <typename ValueType, IsIArchivable<ValueType> concept>
+    void Archiver::ArchiveItem(const char* name, const std::vector<const ValueType*>& array)
+    {
+        auto baseTypeName = GetArchivedTypeName<ValueType>();
+        std::vector<const utilities::IArchivable*> tmpArray;
+        for (const auto& item : array)
+        {
+            tmpArray.push_back(item);
+        }
+        ArchiveArray(name, baseTypeName, tmpArray);
+    }
+
+    //
+    // PropertyUnarchiver class
+    //
+    template <typename ValueType>
+    void Unarchiver::PropertyUnarchiver::operator>>(ValueType&& value)
+    {
+        _unarchiver.Unarchive(_propertyName.c_str(), value);
+    }
+
+    //
+    // OptionalPropertyUnarchiver class
+    //
+    template <typename DefaultValueType>
+    Unarchiver::OptionalPropertyUnarchiver<DefaultValueType>::OptionalPropertyUnarchiver(Unarchiver& archiver, const std::string& name, const DefaultValueType& defaultValue) :
+        _unarchiver(archiver),
+        _propertyName(name),
+        _defaultValue(defaultValue){};
+
+    template <typename DefaultValueType>
+    template <typename ValueType>
+    void Unarchiver::OptionalPropertyUnarchiver<DefaultValueType>::operator>>(ValueType&& value)
+    {
+        if (_unarchiver.HasNextPropertyName(_propertyName))
+        {
+            _unarchiver.Unarchive(_propertyName.c_str(), value);
+        }
+        else
+        {
+            value = _defaultValue;
+        }
+    }
+
+    template <>
+    template <typename ValueType>
+    void Unarchiver::OptionalPropertyUnarchiver<Unarchiver::NoDefault>::operator>>(ValueType&& value)
+    {
+        if (_unarchiver.HasNextPropertyName(_propertyName))
+        {
+            _unarchiver.Unarchive(_propertyName.c_str(), value);
+        }
+    }
+
+    //
+    // Unarchiver class
+    //
+    template <typename ValueType>
+    void Unarchiver::Unarchive(ValueType&& value)
+    {
+        Unarchive("", value);
+    }
+
+    template <typename ValueType>
+    void Unarchiver::operator>>(ValueType&& value)
+    {
+        Unarchive(std::forward<ValueType>(value));
+    }
+
+    template <typename ValueType>
+    void Unarchiver::Unarchive(const char* name, ValueType&& value)
+    {
+        UnarchiveItem(name, value);
+    }
+
+    // STYLE: inline to keep next to its sibling overload
+    inline Unarchiver::OptionalPropertyUnarchiver<Unarchiver::NoDefault> Unarchiver::OptionalProperty(const std::string& name)
+    {
+        return OptionalPropertyUnarchiver<Unarchiver::NoDefault>(*this, name, {});
+    }
+
+    template <typename DefaultValueType>
+    Unarchiver::OptionalPropertyUnarchiver<DefaultValueType> Unarchiver::OptionalProperty(const std::string& name, const DefaultValueType& defaultValue)
+    {
+        return OptionalPropertyUnarchiver<DefaultValueType>(*this, name, defaultValue);
+    }
+
+    // types:
+    // Fundamental
+    // IArchivable (& ArchivedAsPrimitive)
+    // Array
+    template <typename ValueType, IsNotVector<ValueType> concept1, IsNotArchivedAsPrimitive<ValueType> concept2>
+    void Unarchiver::UnarchiveItem(const char* name, ValueType&& value)
+    {
+        UnarchiveValue(name, value);
+    }
+
+    template <typename ValueType, IsNotVector<ValueType> concept1, IsArchivedAsPrimitive<ValueType> concept2>
+    void Unarchiver::UnarchiveItem(const char* name, ValueType&& value)
+    {
+        UnarchiveObjectAsPrimitive(name, value);
+    }
+
+    // unique pointer to non-archivable type
+    template <typename ValueType, IsNotArchivable<ValueType> concept>
+    void Unarchiver::UnarchiveItem(const char* name, std::unique_ptr<ValueType>& value)
+    {
+        if (!UnarchiveNull(name))
+        {
+            auto ptr = std::make_unique<ValueType>();
+            UnarchiveValue(name, *ptr);
+            value = std::move(ptr);
+        }
+    }
+
+    // unique pointer to standard archivable object
+    template <typename ValueType, IsStandardArchivable<ValueType> concept>
+    void Unarchiver::UnarchiveItem(const char* name, std::unique_ptr<ValueType>& value)
+    {
+        if (!UnarchiveNull(name))
+        {
+            auto baseTypeName = GetArchivedTypeName<ValueType>();
+            auto objInfo = BeginUnarchiveObject(name, baseTypeName);
+            _objectInfo.push_back(objInfo);
+            auto encodedTypeName = objInfo.type;
+            std::unique_ptr<ValueType> newPtr = GetContext().GetTypeFactory().Construct<ValueType>(encodedTypeName);
+            UnarchiveObject(name, *newPtr);
+            EndUnarchiveObject(name, encodedTypeName);
+            // TODO: assert back of _objectInfo == objInfo
+            _objectInfo.pop_back();
+            value = std::move(newPtr);
+        }
+    }
+
+    // pointer to serializable-as-primitive type
+    template <typename ValueType, IsArchivedAsPrimitive<ValueType> concept>
+    void Unarchiver::UnarchiveItem(const char* name, std::unique_ptr<ValueType>& value)
+    {
+        if (!UnarchiveNull(name))
+        {
+            auto baseTypeName = GetArchivedTypeName<ValueType>();
+            std::unique_ptr<ValueType> newPtr = std::make_unique<ValueType>();
+            UnarchiveObject(name, *newPtr);
+            value = std::move(newPtr);
+        }
+    }
+
+    // Vector of fundamental types
+    template <typename ValueType, IsFundamental<ValueType> concept>
+    void Unarchiver::UnarchiveItem(const char* name, std::vector<ValueType>& arr)
+    {
+        arr.clear();
+        UnarchiveArray(name, arr);
+    }
+
+    // Vector of strings
+    inline void Unarchiver::UnarchiveItem(const char* name, std::vector<std::string>& arr)
+    {
+        arr.clear();
+        UnarchiveArray(name, arr);
+    }
+
+    // Vector of serializable objects
+    template <typename ValueType, IsIArchivable<ValueType> concept>
+    void Unarchiver::UnarchiveItem(const char* name, std::vector<ValueType>& arr)
+    {
+        arr.clear();
+        auto typeName = GetArchivedTypeName<ValueType>();
+        BeginUnarchiveArray(name, typeName);
+        while (true)
+        {
+            auto good = BeginUnarchiveArrayItem(typeName);
+            if (!good)
+            {
+                break;
+            }
+            ValueType value;
+            Unarchive(value);
+            arr.push_back(value);
+            EndUnarchiveArrayItem(typeName);
+        }
+        EndUnarchiveArray(name, typeName);
+    }
+
+    // Vector of unique pointers to serializable objects
+    template <typename ValueType, IsIArchivable<ValueType> concept>
+    void Unarchiver::UnarchiveItem(const char* name, std::vector<std::unique_ptr<ValueType>>& arr)
+    {
+        arr.clear();
+        auto typeName = GetArchivedTypeName<ValueType>();
+        BeginUnarchiveArray(name, typeName);
+        while (true)
+        {
+            auto good = BeginUnarchiveArrayItem(typeName);
+            if (!good)
+            {
+                break;
+            }
+            std::unique_ptr<ValueType> newPtr;
+            Unarchive(newPtr);
+            arr.push_back(std::move(newPtr));
+            EndUnarchiveArrayItem(typeName);
+        }
+        EndUnarchiveArray(name, typeName);
+    }
+
+    // Vector of raw pointers to serializable objects
+    template <typename ValueType, IsIArchivable<ValueType> concept>
+    void Unarchiver::UnarchiveItem(const char* name, std::vector<const ValueType*>& arr)
+    {
+        arr.clear();
+        auto typeName = GetArchivedTypeName<ValueType>();
+        BeginUnarchiveArray(name, typeName);
+        while (true)
+        {
+            auto good = BeginUnarchiveArrayItem(typeName);
+            if (!good)
+            {
+                break;
+            }
+            std::unique_ptr<ValueType> newPtr;
+            Unarchive(newPtr);
+            arr.push_back(newPtr.release());
+            EndUnarchiveArrayItem(typeName);
+        }
+        EndUnarchiveArray(name, typeName);
+    }
+
+    //
+    // Utility classes
+    //
+
+    template <typename ValueType>
+    EnsureMaxPrecision<ValueType>::EnsureMaxPrecision(std::ostream& out) :
+        _flags(out.flags()),
+        _precision(out.precision()),
+        _out(out)
+    {
+        _out.precision(std::numeric_limits<ValueType>::digits10 + 1);
+    }
+
+    template <typename ValueType>
+    EnsureMaxPrecision<ValueType>::~EnsureMaxPrecision()
+    {
+        _out.flags(_flags);
+        _out.precision(_precision);
+    }
+
+    //
+    // Utility functions
+    //
+    namespace ArchiverImpl
+    {
+        template <typename T>
+        static std::string GetTypeName(...)
+        {
+            return TypeName<T>::GetName();
+        }
+
+        template <typename T, IsIntegral<T> concept = true>
+        static std::string GetTypeName(bool)
+        {
+            return "int";
+        }
+
+        template <typename T, IsFloatingPoint<T> concept = true>
+        static std::string GetTypeName(bool)
+        {
+            return "float";
+        }
+    } // namespace ArchiverImpl
+
+    template <typename T>
+    std::string GetArchivedTypeName()
+    {
+        return ArchiverImpl::GetTypeName<T>(true);
+    }
+
+    template <typename T>
+    std::string GetArchivedTypeName(const T& value)
+    {
+        return value.GetRuntimeTypeName();
+    }
+} // namespace utilities
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/utilities/include/CStringParser.h b/libraries/utilities/include/CStringParser.h
index 5235ce813..a5911628a 100644
--- a/libraries/utilities/include/CStringParser.h
+++ b/libraries/utilities/include/CStringParser.h
@@ -71,4 +71,329 @@ namespace utilities
 } // namespace utilities
 } // namespace ell
 
-#include "../tcc/CStringParser.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace utilities
+{
+    // wrapper for strtof
+    inline ParseResult cParse(const char* pStr, char*& pEnd, float& value)
+    {
+        if (IsWhitespace(*pStr))
+        {
+            return ParseResult::badFormat;
+        }
+
+        auto tmp = errno;
+        errno = 0;
+
+        value = strtof(pStr, &pEnd);
+
+        if (pStr == pEnd)
+        {
+            return ParseResult::badFormat;
+        }
+        if (errno == ERANGE)
+        {
+            return ParseResult::outOfRange;
+        }
+
+        errno = tmp;
+        return ParseResult::success;
+    }
+
+    // wrapper for std::strtod
+    inline ParseResult cParse(const char* pStr, char*& pEnd, double& value)
+    {
+        if (IsWhitespace(*pStr))
+        {
+            return ParseResult::badFormat;
+        }
+
+        auto tmp = errno;
+        errno = 0;
+
+        value = std::strtod(pStr, &pEnd);
+
+        if (pStr == pEnd)
+        {
+            return ParseResult::badFormat;
+        }
+        if (errno == ERANGE)
+        {
+            return ParseResult::outOfRange;
+        }
+
+        errno = tmp;
+        return ParseResult::success;
+    }
+
+    // wrapper for strtoul
+    inline ParseResult cParse(const char* pStr, char*& pEnd, unsigned int& value)
+    {
+        if (!IsDigit(*pStr))
+        {
+            return ParseResult::badFormat;
+        }
+
+        auto tmp = errno;
+        errno = 0;
+
+        auto x = strtoul(pStr, &pEnd, 0);
+        if (x != static_cast<unsigned int>(x))
+        {
+            return ParseResult::outOfRange;
+        }
+
+        value = static_cast<unsigned int>(x);
+
+        if (pStr == pEnd)
+        {
+            return ParseResult::badFormat;
+        }
+        if (errno == ERANGE)
+        {
+            return ParseResult::outOfRange;
+        }
+
+        errno = tmp;
+        return ParseResult::success;
+    }
+
+    // wrapper for strtoul
+    inline ParseResult cParse(const char* pStr, char*& pEnd, uint64_t& value)
+    {
+        if (!IsDigit(*pStr))
+        {
+            return ParseResult::badFormat;
+        }
+
+        auto tmp = errno;
+        errno = 0;
+
+        auto x = strtoul(pStr, &pEnd, 0);
+
+        if (pStr == pEnd)
+        {
+            return ParseResult::badFormat;
+        }
+        if (errno == ERANGE)
+        {
+            return ParseResult::outOfRange;
+        }
+        if (x != static_cast<uint64_t>(x))
+        {
+            return ParseResult::outOfRange;
+        }
+
+        value = static_cast<uint64_t>(x);
+
+        errno = tmp;
+        return ParseResult::success;
+    }
+
+    // wrapper for strtol
+    inline ParseResult cParse(const char* pStr, char*& pEnd, int& value)
+    {
+        if (IsWhitespace(*pStr))
+        {
+            return ParseResult::badFormat;
+        }
+
+        auto tmp = errno;
+        errno = 0;
+
+        auto x = strtol(pStr, &pEnd, 0);
+
+        if (pStr == pEnd)
+        {
+            return ParseResult::badFormat;
+        }
+        if (errno == ERANGE)
+        {
+            return ParseResult::outOfRange;
+        }
+        if (x != static_cast<int>(x))
+        {
+            return ParseResult::outOfRange;
+        }
+
+        value = static_cast<int>(x);
+
+        errno = tmp;
+        return ParseResult::success;
+    }
+
+    // wrapper for strtol
+    inline ParseResult cParse(const char* pStr, char*& pEnd, long& value)
+    {
+        if (IsWhitespace(*pStr))
+        {
+            return ParseResult::badFormat;
+        }
+
+        auto tmp = errno;
+        errno = 0;
+
+        value = strtol(pStr, &pEnd, 0);
+
+        if (pStr == pEnd)
+        {
+            return ParseResult::badFormat;
+        }
+        if (errno == ERANGE)
+        {
+            return ParseResult::outOfRange;
+        }
+
+        errno = tmp;
+        return ParseResult::success;
+    }
+
+    // wrapper for strtoul
+    inline ParseResult cParse(const char* pStr, char*& pEnd, unsigned short& value)
+    {
+        if (!IsDigit(*pStr))
+        {
+            return ParseResult::badFormat;
+        }
+
+        auto tmp = errno;
+        errno = 0;
+
+        auto x = strtoul(pStr, &pEnd, 0);
+
+        if (pStr == pEnd)
+        {
+            return ParseResult::badFormat;
+        }
+        if (errno == ERANGE)
+        {
+            return ParseResult::outOfRange;
+        }
+        if (x != static_cast<unsigned short>(x))
+        {
+            return ParseResult::outOfRange;
+        }
+        value = static_cast<unsigned short>(x);
+
+        errno = tmp;
+        return ParseResult::success;
+    }
+
+    // wrapper for strtol
+    inline ParseResult cParse(const char* pStr, char*& pEnd, short& value)
+    {
+        if (IsWhitespace(*pStr))
+        {
+            return ParseResult::badFormat;
+        }
+
+        auto tmp = errno;
+        errno = 0;
+
+        long x = strtol(pStr, &pEnd, 0);
+
+        if (pStr == pEnd)
+        {
+            return ParseResult::badFormat;
+        }
+        if (errno == ERANGE)
+        {
+            return ParseResult::outOfRange;
+        }
+        if (x != static_cast<short>(x))
+        {
+            return ParseResult::outOfRange;
+        }
+
+        value = static_cast<short>(x);
+
+        errno = tmp;
+        return ParseResult::success;
+    }
+
+    // parse a single char from the input string.
+    inline ParseResult cParse(const char* pStr, char*& pEnd, char& value)
+    {
+        value = *pStr;
+        pEnd = const_cast<char*>(++pStr);
+        return ParseResult::success;
+    }
+
+    // parser for std:string, scans until finding the a character other than alphanumeric or '_'
+    inline ParseResult cParse(const char* pStr, char*& pEnd, std::string& value)
+    {
+        const char* iter = pStr;
+        while (std::isalnum(*iter) || *iter == '_')
+        {
+            ++iter;
+        }
+        value = std::string(pStr, iter);
+        pEnd = const_cast<char*>(iter);
+
+        return ParseResult::success;
+    }
+
+    // wrapper for strtoul
+    template <typename std::enable_if_t<!std::is_same<unsigned long, unsigned int>::value, int> = 0>
+    inline ParseResult cParse(const char* pStr, char*& pEnd, unsigned long& value)
+    {
+        if (!IsDigit(*pStr))
+        {
+            return ParseResult::badFormat;
+        }
+
+        auto tmp = errno;
+        errno = 0;
+
+        value = strtoul(pStr, &pEnd, 0);
+
+        if (pStr == pEnd)
+        {
+            return ParseResult::badFormat;
+        }
+        if (errno == ERANGE)
+        {
+            return ParseResult::outOfRange;
+        }
+
+        errno = tmp;
+        return ParseResult::success;
+    }
+
+    template <typename ValueType>
+    ParseResult Parse(const char*& pStr, ValueType& value)
+    {
+        // check for eof
+        if (IsEndOfString(*pStr))
+        {
+            return ParseResult::endOfString;
+        }
+
+        // check for "//" comment indicator
+        if (*pStr == '/')
+        {
+            if (*(pStr + 1) == '/')
+            {
+                return ParseResult::beginComment;
+            }
+        }
+
+        // check for "#" comment indicator
+        if (*pStr == '#')
+        {
+            return ParseResult::beginComment;
+        }
+
+        char* pEnd = nullptr;
+        auto parseResult = cParse(pStr, pEnd, value);
+        pStr = pEnd;
+
+        return parseResult;
+    }
+} // namespace utilities
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/utilities/include/CommandLineParser.h b/libraries/utilities/include/CommandLineParser.h
index f84ddf296..295fc99cb 100644
--- a/libraries/utilities/include/CommandLineParser.h
+++ b/libraries/utilities/include/CommandLineParser.h
@@ -356,4 +356,129 @@ namespace utilities
 } // namespace utilities
 } // namespace ell
 
-#include "../tcc/CommandLineParser.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace utilities
+{
+    // format of argv: Main.exe [options]
+    // where options are of the form "-<std::string> <option>" where the <option> part is mandatory (defaulting to 'true')
+    // options have two names, the short name is used with a single hyphen, and the long name with two
+    // e.g., "-s true" and "--serial_mode true" can mean the same thing
+    // options are queried by the long name
+    // short name is optional
+    // args are just strings at the end
+    // example of valid commandlines:
+    // myexe.exe file1.tsv
+    // myexe.exe file1.tsv file2.tsv
+    // myexe.exe -t 8 -x someString file1.tsv file2.tsv
+    template <typename T, typename U>
+    void CommandLineParser::AddOption(T& option, std::string name, std::string shortName, std::string description, const U& defaultValue, std::string emptyValueString)
+    {
+        auto callback = [&option](std::string optionVal) {
+            bool didParse = ParseVal<T>(optionVal, option);
+            return didParse;
+        };
+
+        OptionInfo info(name, shortName, description, ToString(defaultValue), emptyValueString, callback);
+        AddOption(info);
+    }
+
+    template <typename T>
+    void CommandLineParser::AddOption(T& option, std::string name, std::string shortName, std::string description, std::initializer_list<std::pair<std::string, T>> enumValues, std::string defaultValue, std::string emptyValueString)
+    {
+        // transform initializer list into useful things that will stick around
+        std::vector<std::string> valueNameStrings;
+        std::vector<std::pair<std::string, T>> valueNamesTable;
+        for (auto v : enumValues)
+        {
+            valueNameStrings.push_back(v.first);
+            valueNamesTable.push_back(v);
+        }
+
+        auto callback = [&option, this, name, valueNamesTable](std::string optionVal) {
+            std::string optionString;
+            bool didParse = ParseVal<T>(optionVal, valueNamesTable, option, optionString);
+            if (didParse)
+            {
+                _options[name].currentValueString = optionString;
+                return true;
+            }
+            else
+            {
+                throw utilities::InputException(utilities::InputExceptionErrors::badStringFormat, "Could not parse value for option " + name);
+                return false;
+            }
+        };
+
+        OptionInfo info(name, shortName, description, defaultValue, emptyValueString, callback);
+        info.enumValues = valueNameStrings;
+        AddOption(info);
+    }
+
+    template <typename T>
+    bool CommandLineParser::ParseVal(std::string str, T& result)
+    {
+        std::stringstream ss(str);
+        ss >> result;
+        return true;
+    }
+
+    template <typename T>
+    bool CommandLineParser::ParseVal(std::string str, std::vector<std::pair<std::string, T>> valNames, T& result, std::string& resultString)
+    {
+        bool foundPartialMatch = false;
+        for (const auto& valNamePair : valNames)
+        {
+            // Exact match
+            if (valNamePair.first == str)
+            {
+                resultString = valNamePair.first;
+                result = valNamePair.second;
+                return true;
+            }
+
+            // Partial match
+            if (valNamePair.first.find(str) == 0)
+            {
+                // More than one partial match -- fail
+                if (foundPartialMatch)
+                {
+                    return false;
+                }
+                resultString = valNamePair.first;
+                result = valNamePair.second;
+                foundPartialMatch = true;
+            }
+        }
+
+        return foundPartialMatch;
+    }
+
+    template <typename T>
+    std::string CommandLineParser::ToString(const T& val)
+    {
+        std::stringstream ss;
+        ss << val;
+        return ss.str();
+    }
+
+    // bool specialization
+    template <>
+    inline bool CommandLineParser::ParseVal<bool>(std::string val, bool& result)
+    {
+        auto lowerval = ToLowercase(val);
+        result = (val == "true" || val == "t");
+        return true;
+    }
+
+    template <>
+    inline std::string CommandLineParser::ToString<bool>(const bool& val)
+    {
+        return val ? "true" : "false";
+    }
+} // namespace utilities
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/utilities/include/Exception.h b/libraries/utilities/include/Exception.h
index a226b6389..73f58e875 100644
--- a/libraries/utilities/include/Exception.h
+++ b/libraries/utilities/include/Exception.h
@@ -114,4 +114,19 @@ namespace utilities
 } // namespace utilities
 } // namespace ell
 
-#include "../tcc/Exception.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace utilities
+{
+    template <typename ErrorCodeType>
+    ErrorCodeException<ErrorCodeType>::ErrorCodeException(ErrorCodeType errorCode, const std::string& message) :
+        Exception(message),
+        _errorCode(errorCode)
+    {
+    }
+} // namespace utilities
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/utilities/include/Format.h b/libraries/utilities/include/Format.h
index afa1d3771..badb3e299 100644
--- a/libraries/utilities/include/Format.h
+++ b/libraries/utilities/include/Format.h
@@ -13,10 +13,10 @@
 
 #include <cctype>
 #include <iomanip>
-#include <sstream>
-#include <type_traits>
 #include <ostream>
+#include <sstream>
 #include <string>
+#include <type_traits>
 
 namespace ell
 {
@@ -166,4 +166,153 @@ namespace utilities
 } // namespace utilities
 } // namespace ell
 
-#include "../tcc/Format.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace utilities
+{
+    Match::Match(const char* pStr) :
+        _pStr(pStr)
+    {
+    }
+
+    Match::Match(const std::string& str) :
+        _pStr(str.c_str())
+    {
+    }
+
+    Match::operator const char*()
+    {
+        return _pStr;
+    }
+
+    template <typename ArgType, typename... ArgTypes>
+    void PrintFormat(std::ostream& os, const char* format, const ArgType& arg, const ArgTypes&... args)
+    {
+        if (*format == '\0')
+        {
+            return;
+        }
+
+        while (*format != substitutionSymbol && *format != '\0')
+        {
+            if (*format != whitespaceSymbol)
+            {
+                os << *format;
+            }
+            ++format;
+        }
+
+        if (*format == substitutionSymbol)
+        {
+            ++format;
+            os << arg;
+        }
+
+        PrintFormat(os, format, args...);
+    }
+
+    template <typename... ArgTypes>
+    std::string PrintFormat(const char* format, const ArgTypes&... args)
+    {
+        std::stringstream ss;
+        PrintFormat(ss, format, args...);
+        return ss.str();
+    }
+
+    template <typename... ArgTypes>
+    MatchResult MatchFormat(const char*& content, const char* format, Match match, ArgTypes&... args)
+    {
+        auto matchResult = MatchToSubstitutionSymbol(content, format);
+
+        if (matchResult != MatchResult::success)
+        {
+            return matchResult;
+        }
+        if (*format == '\0')
+        {
+            return MatchResult::success;
+        }
+
+        // *format = substitutionSymbol
+        ++format;
+
+        const char* cStr = match;
+        matchResult = MatchToSubstitutionSymbol(content, cStr);
+        if (matchResult != MatchResult::success)
+        {
+            return matchResult;
+        }
+        if (*cStr != '\0')
+        {
+            return MatchResult::unexpectedPercentSymbol;
+        }
+
+        return MatchFormat(content, format, args...);
+    }
+
+    template <typename ArgType, typename... ArgTypes>
+    MatchResult MatchFormat(const char*& content, const char* format, ArgType& arg, ArgTypes&... args)
+    {
+        auto matchResult = MatchToSubstitutionSymbol(content, format);
+
+        if (matchResult != MatchResult::success)
+        {
+            return matchResult;
+        }
+        if (*format == '\0')
+        {
+            return MatchResult::success;
+        }
+
+        // *format = substitutionSymbol
+        ++format;
+
+        auto parserResult = Parse<std::remove_reference_t<ArgType>>(content, arg);
+        if (parserResult != ParseResult::success)
+        {
+            return MatchResult::parserError;
+        }
+
+        return MatchFormat(content, format, args...);
+    }
+
+    template <typename... ArgTypes>
+    void MatchFormatThrowsExceptions(const char*& content, const char* format, ArgTypes&&... args)
+    {
+        auto result = MatchFormat(content, format, args...);
+
+        if (result == MatchResult::success)
+        {
+            return;
+        }
+
+        std::string contentSnippet(content, 30);
+        std::string formatSnippet(format, 30);
+        auto snippets = "\"" + contentSnippet + "\" and \"" + formatSnippet + "\"";
+
+        switch (result)
+        {
+        case MatchResult::earlyEndOfContent:
+            throw utilities::InputException(utilities::InputExceptionErrors::badStringFormat, "content ended before format near: \"" + formatSnippet + "\"");
+
+        case MatchResult::mismatch:
+            throw utilities::InputException(utilities::InputExceptionErrors::badStringFormat, "mismatch between content and format near: " + snippets);
+
+        case MatchResult::parserError:
+            throw utilities::InputException(utilities::InputExceptionErrors::badStringFormat, "parser error near: " + snippets);
+
+        case MatchResult::missingArgument:
+            throw utilities::InputException(utilities::InputExceptionErrors::badStringFormat, "missing argument near: " + snippets);
+
+        case MatchResult::unexpectedPercentSymbol:
+            throw utilities::InputException(utilities::InputExceptionErrors::badStringFormat, "unexpected symbol '" + std::to_string(substitutionSymbol) + "' in string argument near: " + snippets);
+
+        case MatchResult::success:; // nothing
+        }
+    }
+} // namespace utilities
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/utilities/include/FunctionUtils.h b/libraries/utilities/include/FunctionUtils.h
index 61b9e89a2..ec32b41ab 100644
--- a/libraries/utilities/include/FunctionUtils.h
+++ b/libraries/utilities/include/FunctionUtils.h
@@ -128,4 +128,54 @@ namespace utilities
 } // namespace utilities
 } // namespace ell
 
-#include "../tcc/FunctionUtils.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace utilities
+{
+    template <typename Function, typename... Functions>
+    void InOrderFunctionEvaluator(Function&& function, Functions&&... functions)
+    {
+        function();
+        InOrderFunctionEvaluator(std::forward<Functions>(functions)...);
+    }
+
+    template <typename FunctionType, typename ArgType, typename... ArgTypes>
+    void ApplyToEach(FunctionType&& function, ArgType&& arg, ArgTypes&&... args)
+    {
+        function(std::forward<ArgType>(arg));
+        ApplyToEach(std::forward<FunctionType>(function), std::forward<ArgTypes>(args)...);
+    }
+
+    template <size_t Index>
+    struct IndexTag
+    {
+        static constexpr size_t index = Index;
+    };
+
+    template <size_t Index>
+    constexpr size_t GetTagIndex(IndexTag<Index> tag)
+    {
+        return Index;
+    }
+
+    namespace detail
+    {
+        template <typename... Args, typename T, size_t... I>
+        std::tuple<Args...> VectorToTuple(std::vector<T> t, std::index_sequence<I...>)
+        {
+            return { t[I]... };
+        }
+    } // namespace detail
+
+    template <typename... Args, typename T>
+    std::tuple<Args...> VectorToTuple(std::vector<T> t)
+    {
+        return detail::VectorToTuple<Args...>(t, std::make_index_sequence<sizeof...(Args)>());
+    }
+
+} // namespace utilities
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/utilities/include/JsonArchiver.h b/libraries/utilities/include/JsonArchiver.h
index 0bd8250d5..39da65aee 100644
--- a/libraries/utilities/include/JsonArchiver.h
+++ b/libraries/utilities/include/JsonArchiver.h
@@ -170,4 +170,294 @@ namespace utilities
 } // namespace utilities
 } // namespace ell
 
-#include "../tcc/JsonArchiver.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace utilities
+{
+    //
+    // Serialization
+    //
+    template <typename ValueType, IsFundamental<ValueType> concept>
+    void JsonArchiver::WriteScalar(const char* name, const ValueType& value)
+    {
+        EnsureMaxPrecision<ValueType> precisionScope(_out);
+        auto indent = GetCurrentIndent();
+        bool hasName = name != std::string("");
+        auto endOfLine = hasName ? ",\n" : "";
+
+        FinishPreviousLine();
+        _out << indent;
+        if (hasName)
+        {
+            _out << "\"" << name << "\": ";
+        }
+        _out << value;
+        SetEndOfLine(endOfLine);
+    }
+
+    // Specialization for bool (though perhaps this should be an overload, not a specialization)
+    template <>
+    inline void JsonArchiver::WriteScalar(const char* name, const bool& value)
+    {
+        auto indent = GetCurrentIndent();
+        bool hasName = name != std::string("");
+        auto endOfLine = hasName ? ",\n" : "";
+
+        FinishPreviousLine();
+        _out << indent;
+        if (hasName)
+        {
+            _out << "\"" << name << "\": ";
+        }
+        _out << (value ? "true" : "false");
+        SetEndOfLine(endOfLine);
+    }
+
+    // This function is inline just so it appears next to the other Write* functions
+    inline void JsonArchiver::WriteScalar(const char* name, const char* value)
+    {
+        auto indent = GetCurrentIndent();
+        bool hasName = name != std::string("");
+        auto endOfLine = hasName ? ",\n" : "";
+
+        FinishPreviousLine();
+        _out << indent;
+        if (hasName)
+        {
+            _out << "\"" << name << "\": ";
+        }
+        _out << "\"" << JsonUtilities::EncodeString(value) << "\"";
+        SetEndOfLine(endOfLine);
+    }
+
+    inline void JsonArchiver::WriteScalar(const char* name, const std::string& value)
+    {
+        auto indent = GetCurrentIndent();
+        bool hasName = name != std::string("");
+        auto endOfLine = hasName ? ",\n" : "";
+
+        FinishPreviousLine();
+        _out << indent;
+        if (hasName)
+        {
+            _out << "\"" << name << "\": ";
+        }
+        _out << "\"" << JsonUtilities::EncodeString(value) << "\"";
+        SetEndOfLine(endOfLine);
+    }
+
+    template <typename ValueType>
+    void JsonArchiver::WriteArray(const char* name, const std::vector<ValueType>& array)
+    {
+        bool hasName = name != std::string("");
+        auto indent = GetCurrentIndent();
+        auto endOfLine = ",\n";
+
+        FinishPreviousLine();
+        _out << indent;
+        if (hasName)
+        {
+            _out << "\"" << name << "\": ";
+        }
+
+        _out << "[";
+
+        // reset indent
+        auto prevIndent = _indent;
+        _indent = 0;
+        auto numItems = array.size();
+        for (size_t index = 0; index < numItems; ++index)
+        {
+            Archive(array[index]);
+            if (index != numItems - 1)
+            {
+                _out << ", ";
+            }
+        }
+        // reset indent
+        _indent = prevIndent;
+        _out << "]";
+        SetEndOfLine(endOfLine);
+    }
+
+    //
+    // Deserialization
+    //
+    template <typename ValueType, IsIntegral<ValueType> concept>
+    void JsonUnarchiver::ReadScalar(const char* name, ValueType& value)
+    {
+        bool hasName = name != std::string("");
+        if (hasName)
+        {
+            MatchFieldName(name);
+        }
+
+        // read string
+        auto valueToken = _tokenizer.ReadNextToken();
+        if (std::is_same<ValueType, uint64_t>())
+            value = static_cast<ValueType>(std::stoull(valueToken));
+        else
+            value = static_cast<ValueType>(std::stoll(valueToken));
+
+        // eat a comma if it exists
+        if (hasName)
+        {
+            if (_tokenizer.PeekNextToken() == ",")
+            {
+                _tokenizer.ReadNextToken();
+            }
+        }
+    }
+
+    template <typename ValueType, IsFloatingPoint<ValueType> concept>
+    void JsonUnarchiver::ReadScalar(const char* name, ValueType& value)
+    {
+        bool hasName = name != std::string("");
+        if (hasName)
+        {
+            MatchFieldName(name);
+        }
+
+        // read string
+        auto valueToken = _tokenizer.ReadNextToken();
+        value = static_cast<ValueType>(std::stod(valueToken));
+
+        // eat a comma if it exists
+        if (hasName)
+        {
+            if (_tokenizer.PeekNextToken() == ",")
+            {
+                _tokenizer.ReadNextToken();
+            }
+        }
+    }
+
+    template <>
+    inline void JsonUnarchiver::ReadScalar(const char* name, bool& value)
+    {
+        bool hasName = name != std::string("");
+        if (hasName)
+        {
+            MatchFieldName(name);
+        }
+
+        // read string
+        auto valueToken = _tokenizer.ReadNextToken();
+        value = (valueToken == "true");
+
+        // eat a comma if it exists
+        if (hasName)
+        {
+            if (_tokenizer.PeekNextToken() == ",")
+            {
+                _tokenizer.ReadNextToken();
+            }
+        }
+    }
+
+    // This function is inline just so it appears next to the other Read* functions
+    inline void JsonUnarchiver::ReadScalar(const char* name, std::string& value)
+    {
+        bool hasName = name != std::string("");
+        if (hasName)
+        {
+            MatchFieldName(name);
+        }
+
+        _tokenizer.MatchToken("\"");
+        auto valueToken = _tokenizer.ReadNextToken();
+        value = JsonUtilities::DecodeString(valueToken);
+        _tokenizer.MatchToken("\"");
+
+        // eat a comma if it exists
+        if (hasName)
+        {
+            if (_tokenizer.PeekNextToken() == ",")
+            {
+                _tokenizer.ReadNextToken();
+            }
+        }
+    }
+
+    template <typename ValueType, IsFundamental<ValueType> concept>
+    void JsonUnarchiver::ReadArray(const char* name, std::vector<ValueType>& array)
+    {
+        bool hasName = name != std::string("");
+        if (hasName)
+        {
+            MatchFieldName(name);
+        }
+
+        _tokenizer.MatchToken("[");
+        while (true)
+        {
+            auto maybeEndArray = _tokenizer.PeekNextToken();
+            if (maybeEndArray == "]")
+            {
+                break;
+            }
+
+            ValueType obj;
+            Unarchive(obj);
+            array.push_back(obj);
+
+            if (_tokenizer.PeekNextToken() == ",")
+            {
+                _tokenizer.ReadNextToken();
+            }
+        }
+        _tokenizer.MatchToken("]");
+
+        // eat a comma if it exists
+        if (hasName)
+        {
+            if (_tokenizer.PeekNextToken() == ",")
+            {
+                _tokenizer.ReadNextToken();
+            }
+        }
+    }
+
+    inline void JsonUnarchiver::ReadArray(const char* name, std::vector<std::string>& array)
+    {
+        bool hasName = name != std::string("");
+        if (hasName)
+        {
+            MatchFieldName(name);
+        }
+
+        _tokenizer.MatchToken("[");
+        while (true)
+        {
+            auto maybeEndArray = _tokenizer.PeekNextToken();
+            if (maybeEndArray == "]")
+            {
+                break;
+            }
+
+            std::string obj;
+            Unarchive(obj);
+            array.push_back(obj);
+
+            if (_tokenizer.PeekNextToken() == ",")
+            {
+                _tokenizer.ReadNextToken();
+            }
+        }
+        _tokenizer.MatchToken("]");
+
+        // eat a comma if it exists
+        if (hasName)
+        {
+            if (_tokenizer.PeekNextToken() == ",")
+            {
+                _tokenizer.ReadNextToken();
+            }
+        }
+    }
+} // namespace utilities
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/utilities/include/ObjectArchive.h b/libraries/utilities/include/ObjectArchive.h
index b90b25da2..f0f3c5914 100644
--- a/libraries/utilities/include/ObjectArchive.h
+++ b/libraries/utilities/include/ObjectArchive.h
@@ -147,4 +147,57 @@ namespace utilities
 } // namespace utilities
 } // namespace ell
 
-#include "../tcc/ObjectArchive.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace utilities
+{
+    //
+    // ObjectArchive
+    //
+
+    template <typename ValueType>
+    void ObjectArchive::SetType(const ValueType& object)
+    {
+        UNUSED(object);
+        _typeName = TypeName<typename std::decay<ValueType>::type>::GetName();
+    }
+
+    template <typename ValueType>
+    void ObjectArchive::CopyValueTo(ValueType&& value) const
+    {
+        value = _value.GetValue<typename std::decay<ValueType>::type>();
+    }
+
+    template <typename ValueType>
+    void ObjectArchive::operator>>(ValueType&& value) const
+    {
+        value = _value.GetValue<typename std::decay<ValueType>::type>();
+    }
+
+    template <typename ValueType>
+    void ObjectArchive::SetValue(ValueType&& value)
+    {
+        SetType(value);
+        _value = value;
+    }
+
+    template <typename ValueType>
+    void ObjectArchive::operator<<(ValueType&& value)
+    {
+        SetValue(value);
+    }
+
+    //
+    // Functions
+    //
+    template <typename ValueType>
+    ValueType CreateObject(const ObjectArchive& archive)
+    {
+        return archive.GetValue<ValueType>();
+    }
+} // namespace utilities
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/utilities/include/ObjectArchiver.h b/libraries/utilities/include/ObjectArchiver.h
index d166592b0..d103bb000 100644
--- a/libraries/utilities/include/ObjectArchiver.h
+++ b/libraries/utilities/include/ObjectArchiver.h
@@ -116,4 +116,147 @@ namespace utilities
 } // namespace utilities
 } // namespace ell
 
-#include "../tcc/ObjectArchiver.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace utilities
+{
+    //
+    // Serialization
+    //
+    template <typename ValueType, IsFundamental<ValueType> concept>
+    void ObjectArchiver::WriteScalar(const char* name, const ValueType& value)
+    {
+        if (std::string{ "" } == name)
+        {
+            _objectDescription << value;
+        }
+        else
+        {
+            _objectDescription[name] << value;
+        }
+    }
+
+    // Specialization for bool (though perhaps this should be an overload, not a specialization)
+    template <>
+    inline void ObjectArchiver::WriteScalar(const char* name, const bool& value)
+    {
+        if (std::string{ "" } == name)
+        {
+            _objectDescription << value;
+        }
+        else
+        {
+            _objectDescription[name] << value;
+        }
+    }
+
+    // This function is inline just so it appears next to the other Write* functions
+    inline void ObjectArchiver::WriteScalar(const char* name, const char* value)
+    {
+        if (std::string{ "" } == name)
+        {
+            _objectDescription << std::string{ value };
+        }
+        else
+        {
+            _objectDescription[name] << std::string{ value };
+        }
+    }
+
+    inline void ObjectArchiver::WriteScalar(const char* name, const std::string& value)
+    {
+        if (std::string{ "" } == name)
+        {
+            _objectDescription << value;
+        }
+        else
+        {
+            _objectDescription[name] << value;
+        }
+    }
+
+    template <typename ValueType>
+    void ObjectArchiver::WriteArray(const char* name, const std::vector<ValueType>& array)
+    {
+        if (std::string{ "" } == name)
+        {
+            _objectDescription << array;
+        }
+        else
+        {
+            _objectDescription[name] << array;
+        }
+    }
+
+    //
+    // Deserialization
+    //
+    template <typename ValueType, IsFundamental<ValueType> concept>
+    void ObjectArchiver::ReadScalar(const char* name, ValueType& value)
+    {
+        if (std::string{ "" } == name)
+        {
+            _objectDescription >> value;
+        }
+        else
+        {
+            _objectDescription[name] >> value;
+        }
+    }
+
+    template <>
+    inline void ObjectArchiver::ReadScalar(const char* name, bool& value)
+    {
+        if (std::string{ "" } == name)
+        {
+            _objectDescription >> value;
+        }
+        else
+        {
+            _objectDescription[name] >> value;
+        }
+    }
+
+    // This function is inline just so it appears next to the other Read* functions
+    inline void ObjectArchiver::ReadScalar(const char* name, std::string& value)
+    {
+        if (std::string{ "" } == name)
+        {
+            _objectDescription >> value;
+        }
+        else
+        {
+            _objectDescription[name] >> value;
+        }
+    }
+
+    template <typename ValueType, IsFundamental<ValueType> concept>
+    void ObjectArchiver::ReadArray(const char* name, std::vector<ValueType>& array)
+    {
+        if (std::string{ "" } == name)
+        {
+            _objectDescription >> array;
+        }
+        else
+        {
+            _objectDescription[name] >> array;
+        }
+    }
+
+    inline void ObjectArchiver::ReadArray(const char* name, std::vector<std::string>& array)
+    {
+        if (std::string{ "" } == name)
+        {
+            _objectDescription >> array;
+        }
+        else
+        {
+            _objectDescription[name] >> array;
+        }
+    }
+} // namespace utilities
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/utilities/include/Optional.h b/libraries/utilities/include/Optional.h
index 717923cba..704e590e1 100644
--- a/libraries/utilities/include/Optional.h
+++ b/libraries/utilities/include/Optional.h
@@ -61,4 +61,48 @@ namespace utilities
 } // namespace utilities
 } // namespace ell
 
-#include "../tcc/Optional.tcc"
+#pragma region implementation
+
+#pragma once
+
+namespace ell
+{
+namespace utilities
+{
+    template <typename T>
+    const T& Optional<T>::GetValue() const
+    {
+        if (!HasValue())
+        {
+            throw InputException(InputExceptionErrors::invalidArgument, "Error: called GetValue on an optional object without a value");
+        }
+        return _value;
+    }
+
+    template <typename T>
+    const T& Optional<T>::GetValue(const T& defaultValue) const
+    {
+        if (!HasValue())
+        {
+            return defaultValue;
+        }
+        return _value;
+    }
+
+    template <typename T>
+    void Optional<T>::SetValue(const T& value)
+    {
+        _value = value;
+        _hasValue = true;
+    }
+
+    template <typename T>
+    void Optional<T>::Clear()
+    {
+        _value = T();
+        _hasValue = false;
+    }
+} // namespace utilities
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/utilities/include/OutputStreamImpostor.h b/libraries/utilities/include/OutputStreamImpostor.h
index b0b85beca..4d860d97a 100644
--- a/libraries/utilities/include/OutputStreamImpostor.h
+++ b/libraries/utilities/include/OutputStreamImpostor.h
@@ -80,4 +80,19 @@ namespace utilities
 } // namespace utilities
 } // namespace ell
 
-#include "../tcc/OutputStreamImpostor.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace utilities
+{
+    template <typename T>
+    std::ostream& OutputStreamImpostor::operator<<(T&& value)
+    {
+        _outputStream.get() << value;
+        return _outputStream;
+    }
+} // namespace utilities
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/utilities/include/ParallelTransformIterator.h b/libraries/utilities/include/ParallelTransformIterator.h
index 5f514a4a0..6ad104e02 100644
--- a/libraries/utilities/include/ParallelTransformIterator.h
+++ b/libraries/utilities/include/ParallelTransformIterator.h
@@ -64,4 +64,89 @@ namespace utilities
 } // namespace utilities
 } // namespace ell
 
-#include "../tcc/ParallelTransformIterator.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace utilities
+{
+    //
+    // ParallelTransformIterator definitions
+    //
+
+    template <typename InputIteratorType, typename OutType, typename FuncType, int MaxTasks>
+    ParallelTransformIterator<InputIteratorType, OutType, FuncType, MaxTasks>::ParallelTransformIterator(InputIteratorType& inIter, FuncType transformFunction) :
+        _inIter(inIter),
+        _transformFunction(transformFunction),
+        _currentOutputValid(false),
+        _currentIndex(0),
+        _endIndex(-1)
+    {
+        // Fill the buffer with futures that are the result of calling std::async(transformFunction) on inIter
+        int maxTasks = MaxTasks == 0 ? std::thread::hardware_concurrency() : MaxTasks;
+        if (maxTasks == 0) // if std::thread::hardware_concurrency isn't implemented, use DEFAULT_MAX_TASKS tasks (maybe this should be 1)
+        {
+            maxTasks = DEFAULT_MAX_TASKS;
+        }
+
+        _futures.reserve(maxTasks);
+        for (int index = 0; index < maxTasks; index++)
+        {
+            if (!_inIter.IsValid())
+            {
+                break;
+            }
+
+            _futures.emplace_back(std::async(std::launch::async, _transformFunction, _inIter.Get()));
+            _inIter.Next();
+        }
+    }
+
+    template <typename InputIteratorType, typename OutType, typename FuncType, int MaxTasks>
+    void ParallelTransformIterator<InputIteratorType, OutType, FuncType, MaxTasks>::Next()
+    {
+        if (!IsValid())
+        {
+            return;
+        }
+        _currentOutputValid = false;
+
+        // If necessary, create new std::future to handle next input
+        if (_inIter.IsValid())
+        {
+            _futures[_currentIndex] = std::async(std::launch::async, _transformFunction, _inIter.Get());
+            _inIter.Next();
+        }
+        else
+        {
+            if (_endIndex < 0) // Check if we've already noted the end index
+            {
+                _endIndex = _currentIndex;
+            }
+        }
+        _currentIndex = (_currentIndex + 1) % _futures.size();
+    };
+
+    template <typename InputIteratorType, typename OutType, typename FuncType, int MaxTasks>
+    OutType ParallelTransformIterator<InputIteratorType, OutType, FuncType, MaxTasks>::Get() const
+    {
+        // Need to cache output of current std::future, because calling std::future::get() twice is an error
+        if (!_currentOutputValid)
+        {
+            _currentOutput = _futures[_currentIndex].get();
+            _currentOutputValid = true;
+        }
+
+        return _currentOutput;
+    }
+
+    template <typename InputIteratorType, typename FuncType>
+    auto MakeParallelTransformIterator(InputIteratorType& inIterator, FuncType transformFunction) -> ParallelTransformIterator<InputIteratorType, decltype(transformFunction(inIterator.Get())), FuncType>
+    {
+        using OutType = decltype(transformFunction(inIterator.Get()));
+        return ParallelTransformIterator<InputIteratorType, OutType, FuncType>(inIterator, transformFunction);
+    }
+} // namespace utilities
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/utilities/include/PropertyBag.h b/libraries/utilities/include/PropertyBag.h
index 79d59ac86..06d7f5cc8 100644
--- a/libraries/utilities/include/PropertyBag.h
+++ b/libraries/utilities/include/PropertyBag.h
@@ -101,4 +101,26 @@ namespace utilities
 } // namespace utilities
 } // namespace ell
 
-#include "../tcc/PropertyBag.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace utilities
+{
+    template <typename ValueType>
+    void PropertyBag::SetEntry(const std::string& key, ValueType value)
+    {
+        _metadata[key] = Variant(value);
+    }
+
+    template <typename ValueType>
+    const ValueType& PropertyBag::GetEntry(const std::string& key) const
+    {
+        // This function throws an exception if there aren't any entries for the key, or if the underlying Variant doesn't contain a value of the correct type
+        const auto& variant = _metadata.at(key);
+        return variant.GetValue<ValueType>();
+    }
+} // namespace utilities
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/utilities/include/RingBuffer.h b/libraries/utilities/include/RingBuffer.h
index 35e0ddbba..3697a41c4 100644
--- a/libraries/utilities/include/RingBuffer.h
+++ b/libraries/utilities/include/RingBuffer.h
@@ -67,4 +67,68 @@ namespace utilities
 } // namespace utilities
 } // namespace ell
 
-#include "../tcc/RingBuffer.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace utilities
+{
+    //
+    // RingBuffer class
+    //
+    template <typename T>
+    RingBuffer<T>::RingBuffer(size_t size) :
+        _buffer(size),
+        _currentPos(0)
+    {
+    }
+
+    template <typename T>
+    size_t RingBuffer<T>::Size() const
+    {
+        return _buffer.size();
+    }
+
+    template <typename T>
+    void RingBuffer<T>::Resize(size_t size)
+    {
+        _buffer.resize(size);
+        _currentPos = 0;
+    }
+
+    template <typename T>
+    size_t RingBuffer<T>::GetBufferIndex(int entryIndex) const
+    {
+        int size = static_cast<int>(Size());
+        return (size + _currentPos - entryIndex) % (int)size; // Note: it's important the RHS argument to % is a signed int
+    }
+
+    template <typename T>
+    const T& RingBuffer<T>::operator[](int index) const
+    {
+        return _buffer[GetBufferIndex(index)];
+    }
+
+    template <typename T>
+    T& RingBuffer<T>::operator[](int index)
+    {
+        return _buffer[GetBufferIndex(index)];
+    }
+
+    template <typename T>
+    void RingBuffer<T>::Append(const T& val)
+    {
+        int size = static_cast<int>(Size());
+        _currentPos = (_currentPos + 1) % size;
+        _buffer[_currentPos] = val;
+    }
+
+    template <typename T>
+    void RingBuffer<T>::Fill(const T& val)
+    {
+        std::fill(_buffer.begin(), _buffer.end(), val);
+    }
+} // namespace utilities
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/utilities/include/StlContainerIterator.h b/libraries/utilities/include/StlContainerIterator.h
index 9e845719a..3929c7d3f 100644
--- a/libraries/utilities/include/StlContainerIterator.h
+++ b/libraries/utilities/include/StlContainerIterator.h
@@ -133,4 +133,52 @@ namespace utilities
 } // namespace utilities
 } // namespace ell
 
-#include "../tcc/StlContainerIterator.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace utilities
+{
+    template <typename IteratorType>
+    StlContainerIteratorBase<IteratorType>::StlContainerIteratorBase(IteratorType begin, IteratorType end) :
+        _current(begin),
+        _end(end)
+    {
+    }
+
+    template <typename IteratorType>
+    void StlContainerIteratorBase<IteratorType>::Next()
+    {
+        if (IsValid())
+        {
+            ++_current;
+        }
+    }
+
+    template <typename IteratorType>
+    StlContainerIterator<IteratorType> MakeStlContainerIterator(IteratorType begin, IteratorType end)
+    {
+        return StlContainerIterator<IteratorType>(begin, end);
+    }
+
+    template <typename ContainerType>
+    StlContainerIterator<typename ContainerType::iterator, typename ContainerType::value_type> MakeStlContainerIterator(ContainerType& container)
+    {
+        return StlContainerIterator<typename ContainerType::iterator, typename ContainerType::value_type>(container.begin(), container.end());
+    }
+
+    template <typename IteratorType>
+    StlContainerReferenceIterator<IteratorType> MakeStlContainerReferenceIterator(IteratorType begin, IteratorType end)
+    {
+        return StlContainerReferenceIterator<IteratorType>(begin, end);
+    }
+
+    template <typename ContainerType>
+    StlContainerReferenceIterator<typename ContainerType::iterator, typename ContainerType::value_type> MakeStlContainerReferenceIterator(ContainerType& container)
+    {
+        return StlContainerReferenceIterator<typename ContainerType::iterator, typename ContainerType::value_type>(container.begin(), container.end());
+    }
+} // namespace utilities
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/utilities/include/StlStridedIterator.h b/libraries/utilities/include/StlStridedIterator.h
index 54c4fbe0c..eca0c8ec8 100644
--- a/libraries/utilities/include/StlStridedIterator.h
+++ b/libraries/utilities/include/StlStridedIterator.h
@@ -93,4 +93,193 @@ namespace utilities
 } // namespace utilities
 } // namespace ell
 
-#include "../tcc/StlStridedIterator.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace utilities
+{
+    template <typename IteratorType>
+    StlStridedIterator<IteratorType>::StlStridedIterator(const IteratorType& iterator, difference_type stride) :
+        _iterator(iterator),
+        _stride(stride)
+    {
+    }
+
+    //
+    // Required operations for random-access iterators:
+    //
+
+    template <typename IteratorType>
+    bool StlStridedIterator<IteratorType>::operator==(const StlStridedIterator<IteratorType>& other) const
+    {
+        return _iterator == other._iterator;
+    }
+
+    template <typename IteratorType>
+    bool StlStridedIterator<IteratorType>::operator!=(const StlStridedIterator<IteratorType>& other) const
+    {
+        return _iterator != other._iterator;
+    }
+
+    template <typename IteratorType>
+    typename StlStridedIterator<IteratorType>::reference StlStridedIterator<IteratorType>::operator*()
+    {
+        return *_iterator;
+    }
+
+    template <typename IteratorType>
+    typename StlStridedIterator<IteratorType>::const_reference StlStridedIterator<IteratorType>::operator*() const
+    {
+        return *_iterator;
+    }
+
+    template <typename IteratorType>
+    typename StlStridedIterator<IteratorType>::pointer StlStridedIterator<IteratorType>::operator->()
+    {
+        return _iterator;
+    }
+
+    template <typename IteratorType>
+    typename StlStridedIterator<IteratorType>::const_pointer StlStridedIterator<IteratorType>::operator->() const
+    {
+        return _iterator;
+    }
+
+    template <typename IteratorType>
+    StlStridedIterator<IteratorType>& StlStridedIterator<IteratorType>::operator++()
+    {
+        _iterator += _stride;
+        return *this;
+    }
+
+    template <typename IteratorType>
+    StlStridedIterator<IteratorType> StlStridedIterator<IteratorType>::operator++(int)
+    {
+        auto temp = _iterator;
+        _iterator += _stride;
+        return { temp, _stride };
+    }
+
+    template <typename IteratorType>
+    StlStridedIterator<IteratorType>& StlStridedIterator<IteratorType>::operator--()
+    {
+        _iterator -= _stride;
+        return *this;
+    }
+
+    template <typename IteratorType>
+    StlStridedIterator<IteratorType> StlStridedIterator<IteratorType>::operator--(int)
+    {
+        auto temp = _iterator;
+        _iterator -= _stride;
+        return { temp, _stride };
+    }
+
+    template <typename IteratorType>
+    StlStridedIterator<IteratorType>& StlStridedIterator<IteratorType>::operator+=(difference_type increment)
+    {
+        _iterator += increment * _stride;
+        return *this;
+    }
+
+    template <typename IteratorType>
+    StlStridedIterator<IteratorType>& StlStridedIterator<IteratorType>::operator-=(difference_type increment)
+    {
+        _iterator -= increment * _stride;
+        return *this;
+    }
+
+    template <typename IteratorType>
+    bool StlStridedIterator<IteratorType>::operator>(const StlStridedIterator<IteratorType>& other) const
+    {
+        return _iterator > other._iterator;
+    }
+
+    template <typename IteratorType>
+    bool StlStridedIterator<IteratorType>::operator<(const StlStridedIterator<IteratorType>& other) const
+    {
+        return _iterator < other._iterator;
+    }
+
+    template <typename IteratorType>
+    bool StlStridedIterator<IteratorType>::operator>=(const StlStridedIterator<IteratorType>& other) const
+    {
+        return _iterator >= other._iterator;
+    }
+
+    template <typename IteratorType>
+    bool StlStridedIterator<IteratorType>::operator<=(const StlStridedIterator<IteratorType>& other) const
+    {
+        return _iterator <= other._iterator;
+    }
+
+    template <typename IteratorType>
+    typename StlStridedIterator<IteratorType>::reference StlStridedIterator<IteratorType>::operator[](typename StlStridedIterator<IteratorType>::size_type index)
+    {
+        return _iterator[index * _stride];
+    }
+
+    template <typename IteratorType>
+    typename StlStridedIterator<IteratorType>::const_reference StlStridedIterator<IteratorType>::operator[](typename StlStridedIterator<IteratorType>::size_type index) const
+    {
+        return _iterator[index * _stride];
+    }
+
+    template <typename IteratorType>
+    IteratorType StlStridedIterator<IteratorType>::GetBaseIterator() const
+    {
+        return _iterator;
+    }
+
+    template <typename IteratorType>
+    typename StlStridedIterator<IteratorType>::difference_type StlStridedIterator<IteratorType>::GetStride() const
+    {
+        return _stride;
+    }
+
+    //
+    // Basic math operators defined as standalone functions
+    //
+    template <typename IteratorType>
+    StlStridedIterator<IteratorType> operator+(const StlStridedIterator<IteratorType>& iterator, typename StlStridedIterator<IteratorType>::difference_type increment)
+    {
+        auto result = iterator;
+        result += increment;
+        return result;
+    }
+
+    template <typename IteratorType>
+    StlStridedIterator<IteratorType> operator+(typename StlStridedIterator<IteratorType>::difference_type increment, const StlStridedIterator<IteratorType>& iterator)
+    {
+        auto result = iterator;
+        result += increment;
+        return result;
+    }
+
+    template <typename IteratorType>
+    StlStridedIterator<IteratorType> operator-(const StlStridedIterator<IteratorType>& iterator, typename StlStridedIterator<IteratorType>::difference_type increment)
+    {
+        auto result = iterator;
+        result -= increment;
+        return result;
+    }
+
+    template <typename IteratorType>
+    typename StlStridedIterator<IteratorType>::difference_type operator-(const StlStridedIterator<IteratorType>& iterator1, const StlStridedIterator<IteratorType>& iterator2)
+    {
+        auto p1 = iterator1.GetBaseIterator();
+        auto p2 = iterator2.GetBaseIterator();
+        return (p1 - p2) / iterator1.GetStride();
+    }
+
+    // utility function for creating StlStridedIterator
+    template <typename IteratorType>
+    StlStridedIterator<IteratorType> MakeStlStridedIterator(const IteratorType& iterator, ptrdiff_t stride)
+    {
+        return { iterator, stride };
+    }
+} // namespace utilities
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/utilities/include/TransformIterator.h b/libraries/utilities/include/TransformIterator.h
index 1fc096a57..97c4a1160 100644
--- a/libraries/utilities/include/TransformIterator.h
+++ b/libraries/utilities/include/TransformIterator.h
@@ -52,4 +52,29 @@ namespace utilities
 } // namespace utilities
 } // namespace ell
 
-#include "../tcc/TransformIterator.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace utilities
+{
+    //
+    // TransformIterator definitions
+    //
+    template <typename InputIteratorType, typename OutType, typename FuncType>
+    TransformIterator<InputIteratorType, OutType, FuncType>::TransformIterator(InputIteratorType& inIter, FuncType transformFunction) :
+        _inIter(inIter),
+        _transformFunction(transformFunction)
+    {
+    }
+
+    template <typename InputIteratorType, typename FnType>
+    auto MakeTransformIterator(InputIteratorType& inIterator, FnType transformFunction) -> TransformIterator<InputIteratorType, decltype(transformFunction(inIterator.Get())), FnType>
+    {
+        using OutType = decltype(transformFunction(inIterator.Get()));
+        return TransformIterator<InputIteratorType, OutType, FnType>(inIterator, transformFunction);
+    }
+} // namespace utilities
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/utilities/include/TypeFactory.h b/libraries/utilities/include/TypeFactory.h
index 460614060..a2334dc9b 100644
--- a/libraries/utilities/include/TypeFactory.h
+++ b/libraries/utilities/include/TypeFactory.h
@@ -130,4 +130,120 @@ namespace utilities
 } // namespace utilities
 } // namespace ell
 
-#include "../tcc/TypeFactory.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace utilities
+{
+    template <typename BaseType>
+    std::unique_ptr<BaseType> TypeFactory<BaseType>::Construct(const std::string& typeName) const
+    {
+        auto entry = _typeMap.find(typeName);
+        if (entry == _typeMap.end())
+        {
+            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "type " + typeName + " not registered in TypeFactory<" + BaseType::GetTypeName() + ">");
+        }
+
+        return entry->second();
+    }
+
+    template <typename BaseType>
+    template <typename RuntimeType>
+    void TypeFactory<BaseType>::AddType()
+    {
+        std::string typeName = RuntimeType::GetTypeName();
+        AddType<RuntimeType>(typeName);
+    }
+
+    template <typename BaseType>
+    template <typename RuntimeType>
+    void TypeFactory<BaseType>::AddType(const std::string& typeName)
+    {
+        static_assert(std::is_base_of<BaseType, RuntimeType>::value, "incompatible base and runtime types in TypeFactory::Add");
+
+        DEBUG_THROW(_typeMap.find(typeName) != _typeMap.end(), std::logic_error(typeName + " has already been added to the type factory"));
+
+        _typeMap[typeName] = []() -> std::unique_ptr<BaseType> { return (std::make_unique<RuntimeType>()); };
+    }
+
+    //
+    // GenericTypeFactory
+    //
+    template <typename BaseType>
+    class TypeConstructorDerived : public TypeConstructorBase
+    {
+    public:
+        template <typename RuntimeType>
+        static std::unique_ptr<TypeConstructorDerived<BaseType>> NewTypeConstructor()
+        {
+            auto result = std::make_unique<TypeConstructorDerived<BaseType>>();
+            result->_createFunction = []() {
+                auto runtimePtr = new RuntimeType();
+                auto basePtr = dynamic_cast<BaseType*>(runtimePtr);
+                return std::unique_ptr<BaseType>(basePtr);
+            };
+            return result;
+        }
+
+        std::unique_ptr<BaseType> Construct() const
+        {
+            return _createFunction();
+        }
+
+    private:
+        std::function<std::unique_ptr<BaseType>()> _createFunction;
+    };
+
+    //
+    // TypeConstructorBase implementation
+    //
+    template <typename BaseType>
+    std::unique_ptr<BaseType> TypeConstructorBase::Construct() const
+    {
+        auto thisPtr = dynamic_cast<const TypeConstructorDerived<BaseType>*>(this);
+        if (thisPtr == nullptr)
+        {
+            throw InputException(InputExceptionErrors::typeMismatch, std::string{ "TypeConstructorBase::Construct called with wrong type. BaseType: " } + BaseType::GetTypeName());
+        }
+
+        return thisPtr->Construct();
+    }
+
+    //
+    // GenericTypeFactory implementation
+    //
+    template <typename BaseType>
+    std::unique_ptr<BaseType> GenericTypeFactory::Construct(const std::string& typeName) const
+    {
+        auto baseTypeName = std::string{ BaseType::GetTypeName() };
+        auto key = baseTypeName + "__" + typeName;
+        auto entry = _typeConstructorMap.find(key);
+        if (entry == _typeConstructorMap.end())
+        {
+            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "type " + typeName + " not registered in TypeFactory<" + BaseType::GetTypeName() + ">");
+        }
+
+        return entry->second->Construct<BaseType>();
+    }
+
+    template <typename BaseType, typename RuntimeType>
+    void GenericTypeFactory::AddType()
+    {
+        auto typeName = RuntimeType::GetTypeName();
+        AddType<BaseType, RuntimeType>(typeName);
+    }
+
+    template <typename BaseType, typename RuntimeType>
+    void GenericTypeFactory::AddType(const std::string& typeName)
+    {
+        auto baseTypeName = std::string{ BaseType::GetTypeName() };
+        auto key = baseTypeName + "__" + typeName;
+
+        auto derivedCreator = TypeConstructorDerived<BaseType>::template NewTypeConstructor<RuntimeType>().release();
+        _typeConstructorMap[key] = std::shared_ptr<TypeConstructorBase>(derivedCreator);
+    }
+} // namespace utilities
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/utilities/include/TypeName.h b/libraries/utilities/include/TypeName.h
index ce899c763..23af70f88 100644
--- a/libraries/utilities/include/TypeName.h
+++ b/libraries/utilities/include/TypeName.h
@@ -356,4 +356,61 @@ namespace utilities
 } // namespace utilities
 } // namespace ell
 
-#include "../tcc/TypeName.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace utilities
+{
+    template <typename T>
+    std::string TypeName<T, std::enable_if_t<HasGetTypeName<std::decay_t<T>>::value>>::GetName()
+    {
+        return std::string(std::decay_t<T>::GetTypeName());
+    };
+
+    template <typename T>
+    std::string TypeName<T, std::enable_if_t<std::is_enum<std::decay_t<T>>::value>>::GetName()
+    {
+        return "enum";
+    };
+
+    template <typename T>
+    std::string TypeName<T*>::GetName()
+    {
+        return GetCompositeTypeName<T>("ptr");
+    }
+
+    template <typename T>
+    std::string TypeName<std::unique_ptr<T>>::GetName()
+    {
+        return GetCompositeTypeName<T>("unique_ptr");
+    }
+
+    template <typename T>
+    std::string TypeName<std::vector<T>>::GetName()
+    {
+        return GetCompositeTypeName<T>("vector");
+    }
+
+    template <typename T>
+    std::string TypeName<const std::vector<T>&>::GetName()
+    {
+        return GetCompositeTypeName<T>("vector");
+    }
+
+    template <typename Type>
+    std::string GetTypeName()
+    {
+        return TypeName<typename std::decay_t<Type>>::GetName();
+    }
+
+    template <typename... Types>
+    std::string GetCompositeTypeName(std::string baseType)
+    {
+        auto typeStrings = std::vector<std::string>{ TypeName<Types>::GetName()... };
+        return GetCompositeTypeName(baseType, typeStrings);
+    }
+} // namespace utilities
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/utilities/include/Variant.h b/libraries/utilities/include/Variant.h
index 0cb079ced..5169416bb 100644
--- a/libraries/utilities/include/Variant.h
+++ b/libraries/utilities/include/Variant.h
@@ -301,4 +301,907 @@ namespace utilities
 } // namespace utilities
 } // namespace ell
 
-#include "../tcc/Variant.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace utilities
+{
+    //
+    // Private code we'd like to hide:
+    //
+    namespace VariantDetail
+    {
+        //
+        // Operator helpers
+        //
+        template <typename ValueType>
+        using IsIncrementable = typename std::enable_if_t<(std::is_integral<std::decay_t<ValueType>>::value && !std::is_same<std::decay_t<ValueType>, bool>::value) || std::is_floating_point<std::decay_t<ValueType>>::value, bool>;
+        template <typename ValueType>
+        using IsNotIncrementable = typename std::enable_if_t<!((std::is_integral<std::decay_t<ValueType>>::value && !std::is_same<std::decay_t<ValueType>, bool>::value) || std::is_floating_point<std::decay_t<ValueType>>::value), bool>;
+
+        template <typename ValueType, IsIncrementable<ValueType> concept = true>
+        void Increment(ValueType& value)
+        {
+            ++value;
+        }
+
+        template <typename ValueType, IsNotIncrementable<ValueType> concept = true>
+        void Increment(ValueType& value)
+        {
+            UNUSED(value);
+            throw utilities::InputException(utilities::InputExceptionErrors::typeMismatch);
+        };
+
+        template <typename ValueType, IsIncrementable<ValueType> concept = true>
+        void Decrement(ValueType& value)
+        {
+            --value;
+        }
+
+        template <typename ValueType, IsNotIncrementable<ValueType> concept = true>
+        void Decrement(ValueType& value)
+        {
+            UNUSED(value);
+            throw utilities::InputException(utilities::InputExceptionErrors::typeMismatch);
+        }
+
+        //
+        // VariantBase --- private implementation class used by Variant
+        //
+        class VariantBase : public ArchivedAsPrimitive
+        {
+        public:
+            virtual ~VariantBase() = default;
+
+        protected:
+            VariantBase(std::type_index type);
+            virtual std::unique_ptr<VariantBase> Clone() const = 0;
+            virtual std::string ToString() const = 0;
+            virtual std::string GetStoredTypeName() const = 0;
+            virtual bool IsPrimitiveType() const = 0;
+            virtual bool IsIntegralType() const = 0;
+            virtual bool IsFloatingPointType() const = 0;
+            virtual bool IsEnumType() const = 0;
+
+            virtual void ParseInto(const std::string& s) = 0;
+            virtual bool TryParseInto(const std::string& s) = 0;
+
+            virtual bool IsIArchivable() const = 0;
+            virtual bool IsPointer() const = 0;
+
+            // Getting / setting value as a fundamental type (of the largest width)
+            virtual intmax_t GetIntValue() const = 0;
+            virtual long double GetFloatValue() const = 0;
+            virtual void SetIntValue(intmax_t value) = 0;
+            virtual void SetFloatValue(long double value) = 0;
+            static std::string GetTypeName() { return "VariantBase"; }
+            std::string GetRuntimeTypeName() const override { return GetTypeName(); }
+
+            // operators
+            virtual void operator++() { throw utilities::InputException(utilities::InputExceptionErrors::typeMismatch); };
+            virtual void operator++(int) { throw utilities::InputException(utilities::InputExceptionErrors::typeMismatch); };
+            virtual void operator--() { throw utilities::InputException(utilities::InputExceptionErrors::typeMismatch); };
+            virtual void operator--(int) { throw utilities::InputException(utilities::InputExceptionErrors::typeMismatch); };
+
+        private:
+            friend class ell::utilities::Variant;
+
+            template <typename ValueType>
+            ValueType& GetValue();
+
+            template <typename ValueType>
+            const ValueType& GetValue() const;
+
+            template <typename ValueType>
+            void SetValue(ValueType&& value);
+
+            std::type_index _type; // redundant with type in Variant class.
+        };
+
+        //
+        // VariantDerived definition --- private implementation class used by Variant
+        //
+        template <typename ValueType>
+        class VariantDerived : public VariantBase
+        {
+        public:
+            /// <summary> Default constructor </summary>
+            VariantDerived();
+
+            /// <summary> Constructor </summary>
+            ///
+            /// <param name="val"> Value to wrap in a variant </param>
+            VariantDerived(const ValueType& val);
+
+        protected:
+            ValueType& GetValue() { return _value; }
+            const ValueType& GetValue() const { return _value; }
+            void SetValue(const ValueType& value);
+
+            std::unique_ptr<VariantBase> Clone() const override;
+            std::string ToString() const override;
+            std::string GetStoredTypeName() const override;
+            bool IsPrimitiveType() const override { return std::is_fundamental<ValueType>::value; }
+            bool IsIntegralType() const override { return std::is_integral<ValueType>::value; }
+            bool IsFloatingPointType() const override { return std::is_floating_point<ValueType>::value; }
+            bool IsEnumType() const override { return std::is_enum<ValueType>::value; }
+
+            void ParseInto(const std::string& s) override;
+            bool TryParseInto(const std::string& s) override;
+
+            bool IsIArchivable() const override { return !IsPrimitiveType(); }
+            bool IsPointer() const override { return std::is_pointer<ValueType>::value; }
+
+            // Getting value as a fundamental type (of the largest width)
+            intmax_t GetIntValue() const override;
+            long double GetFloatValue() const override;
+            void SetIntValue(intmax_t value) override;
+            void SetFloatValue(long double value) override;
+
+            void operator++() override { Increment(_value); }
+            void operator++(int) override { Increment(_value); }
+            void operator--() override { Decrement(_value); }
+            void operator--(int) override { Decrement(_value); }
+
+            static std::string GetTypeName() { return GetCompositeTypeName<ValueType>("VariantDerived"); }
+            std::string GetRuntimeTypeName() const override { return GetTypeName(); }
+            void WriteToArchive(utilities::Archiver& archiver) const override;
+            void ReadFromArchive(utilities::Unarchiver& archiver) override;
+
+        private:
+            friend class Variant;
+            friend class VariantBase;
+
+            ValueType _value;
+            std::string _typeName;
+        };
+
+        // GetValueString
+        using std::to_string;
+        using utilities::to_string;
+        inline std::string to_string(const std::string& str)
+        {
+            return str;
+        }
+
+        template <typename ValueType>
+        auto GetValueStringHelper(const ValueType& value, ...) -> std::string
+        {
+            UNUSED(value);
+            return "";
+        }
+
+        template <typename ValueType>
+        auto GetValueStringHelper(const ValueType& value, int) -> decltype(to_string(value), std::string())
+        {
+            return to_string(value);
+        }
+
+        template <typename ValueType>
+        auto GetValueString(const ValueType& value) -> std::string
+        {
+            return GetValueStringHelper(value, 0);
+        }
+
+        // TryParseValue
+
+        template <typename ValueType>
+        bool TryParseValueHelper(const std::string& s, ValueType& value, ...)
+        {
+            UNUSED(s);
+            UNUSED(value);
+            return false;
+        }
+
+        inline bool TryParseValueHelper(const std::string& s, std::string& value, int)
+        {
+            value = s;
+            return true;
+        }
+
+        template <typename ValueType, IsFundamental<ValueType> = true>
+        bool TryParseValueHelper(const std::string& s, ValueType& value, int)
+        {
+            std::stringstream sstr(s);
+            ValueType v;
+            sstr >> v;
+            value = v;
+            return !sstr.fail();
+        }
+
+        template <typename ValueType, IsEnum<ValueType> = true>
+        bool TryParseValueHelper(const std::string& s, ValueType& value, int)
+        {
+            std::stringstream sstr(s);
+            std::underlying_type_t<ValueType> v;
+            sstr >> v;
+            value = static_cast<ValueType>(v);
+            return !sstr.fail();
+        }
+
+        template <typename ValueType>
+        bool TryParseArchivable(std::string s, ValueType& value)
+        {
+            if (std::is_base_of<ArchivedAsPrimitive, std::decay_t<ValueType>>())
+            {
+                s = std::string("\"") + s + std::string("\"");
+            }
+
+            std::stringstream stream(s);
+            SerializationContext context;
+            JsonUnarchiver ar(stream, context);
+            ar >> value;
+            return true;
+        }
+
+        template <typename ValueType, IsIArchivable<ValueType> = true>
+        bool TryParseValueHelper(const std::string& s, ValueType& value, int)
+        {
+            return TryParseArchivable(s, value);
+        }
+
+        template <typename ValueType, IsVector<ValueType> = true>
+        bool TryParseValueHelper(const std::string& s, ValueType& value, int)
+        {
+            return TryParseArchivable(s, value);
+        }
+
+        template <typename ValueType>
+        bool TryParseValue(const std::string& s, ValueType& value)
+        {
+            return TryParseValueHelper(s, value, 0);
+        }
+
+        // TryConvertValue
+        template <typename InputValueType, typename OutputValueType>
+        bool TryConvertValueHelper(const InputValueType& input, OutputValueType& output, ...)
+        {
+            UNUSED(input, output);
+            return false;
+        }
+
+        inline bool TryConvertValueHelper(const std::string& inValue, std::string& outValue, int)
+        {
+            outValue = inValue;
+            return true;
+        }
+
+        // int, float, enum, bool <--> int, float, enum, bool  (maybe not enum<-->float)
+
+        // fundamental -> int
+        template <typename InputValueType, typename OutputValueType, IsFundamental<InputValueType> = true, IsNonBooleanIntegral<OutputValueType> = true>
+        bool TryConvertValueHelper(const InputValueType& inValue, OutputValueType& outValue, int)
+        {
+            outValue = static_cast<OutputValueType>(inValue);
+            return true;
+        }
+
+        // fundamental -> bool
+        template <typename InputValueType, typename OutputValueType, IsFundamental<InputValueType> = true, IsBoolean<OutputValueType> = true>
+        bool TryConvertValueHelper(const InputValueType& inValue, OutputValueType& outValue, int)
+        {
+            outValue = static_cast<OutputValueType>(inValue != 0);
+            return true;
+        }
+
+        // fundamental -> float
+        template <typename InputValueType, typename OutputValueType, IsFundamental<InputValueType> = true, IsFloatingPoint<OutputValueType> = true>
+        bool TryConvertValueHelper(const InputValueType& inValue, OutputValueType& outValue, int)
+        {
+            outValue = static_cast<OutputValueType>(inValue);
+            return true;
+        }
+
+        // int -> enum
+        template <typename InputValueType, typename OutputValueType, IsIntegral<InputValueType> = true, IsEnum<OutputValueType> = true>
+        bool TryConvertValueHelper(const InputValueType& inValue, OutputValueType& outValue, int)
+        {
+            outValue = static_cast<OutputValueType>(inValue);
+            return true;
+        }
+
+        // main function
+        template <typename InputValueType, typename OutputValueType>
+        bool TryConvertValue(const InputValueType& inValue, OutputValueType& outValue)
+        {
+            return TryConvertValueHelper(inValue, outValue, 0);
+        }
+
+        // Setting values from fundamental types
+        template <typename ValueType>
+        using CanCastToInt = std::enable_if_t<std::is_fundamental<std::decay_t<ValueType>>::value || std::is_enum<ValueType>::value, bool>;
+
+        template <typename ValueType>
+        using CanNotCastToInt = std::enable_if_t<!(std::is_fundamental<std::decay_t<ValueType>>::value || std::is_enum<ValueType>::value), bool>;
+
+        template <typename T, CanCastToInt<T> = true>
+        intmax_t CastToIntMax(T&& value)
+        {
+            return static_cast<intmax_t>(value);
+        }
+
+        template <typename T, CanNotCastToInt<T> = true>
+        intmax_t CastToIntMax(T&& value)
+        {
+            UNUSED(value);
+            return 0;
+        }
+
+        template <typename T, IsFundamental<T> = true>
+        long double CastToLongDouble(T&& value)
+        {
+            return static_cast<long double>(value);
+        }
+
+        template <typename T, IsNotFundamental<T> = true>
+        long double CastToLongDouble(T&& value)
+        {
+            UNUSED(value);
+            return 0;
+        }
+
+        // Helper functions to allow SFINAE to select between implementations of Archiver::operator<<
+        template <typename ValueType, IsArchivableVariantType<ValueType> concept = true>
+        void ArchiveValue(Archiver& archiver, ValueType&& value)
+        {
+            archiver << value;
+        }
+
+        template <typename ValueType, IsArchivableVariantType<ValueType> concept = true>
+        void ArchiveValue(Archiver& archiver, const std::vector<ValueType>& value)
+        {
+            archiver << value;
+        }
+
+        template <typename ValueType, IsNotArchivableVariantType<ValueType> concept = true, IsNotVector<ValueType> concept2 = true>
+        void ArchiveValue(Archiver& archiver, ValueType&& value)
+        {
+            UNUSED(archiver, value);
+            throw InputException(InputExceptionErrors::typeMismatch, std::string("VariantBase::ArchiveValue called with unarchivable type: ") + GetTypeName<ValueType>());
+        }
+
+        template <typename ValueType, IsNotArchivableVariantType<ValueType> concept = true>
+        void ArchiveValue(Archiver& archiver, const std::vector<ValueType>& value)
+        {
+            UNUSED(archiver, value);
+            throw InputException(InputExceptionErrors::typeMismatch, std::string("VariantBase::ArchiveValue called with unarchivable vector type: ") + GetTypeName<ValueType>());
+        }
+
+        //
+        // Helper functions to allow SFINAE to select between implementations of Unarchiver::operator>>
+        //
+        template <typename ValueType, IsArchivableVariantType<ValueType> concept = true>
+        void UnarchiveValue(Unarchiver& archiver, ValueType&& value)
+        {
+            archiver >> value;
+        }
+
+        template <typename ValueType, IsArchivableVariantType<ValueType> concept = true>
+        void UnarchiveValue(Unarchiver& archiver, std::vector<ValueType>& value)
+        {
+            archiver >> value;
+        }
+
+        template <typename ValueType, IsNotArchivableVariantType<ValueType> concept = true, IsNotVector<ValueType> concept2 = true>
+        void UnarchiveValue(Unarchiver& archiver, ValueType&& value)
+        {
+            UNUSED(archiver, value);
+            throw InputException(InputExceptionErrors::typeMismatch, std::string("VariantBase::ArchiveValue called with unarchivable type: ") + GetTypeName<ValueType>());
+        }
+
+        template <typename ValueType, IsNotArchivableVariantType<ValueType> concept = true>
+        void UnarchiveValue(Unarchiver& archiver, std::vector<ValueType>& value)
+        {
+            UNUSED(archiver, value);
+            throw InputException(InputExceptionErrors::typeMismatch, std::string("VariantBase::UnarchiveValue called with unarchivable type: ") + GetTypeName<typename std::decay<ValueType>::type>());
+        }
+
+        //
+        // VariantBase implementation
+        //
+        inline VariantBase::VariantBase(std::type_index type) :
+            _type(type){};
+
+        template <typename ValueType>
+        ValueType& VariantBase::GetValue()
+        {
+            auto thisPtr = dynamic_cast<const VariantDetail::VariantDerived<std::decay_t<ValueType>>*>(this);
+            if (thisPtr == nullptr)
+            {
+                throw InputException(InputExceptionErrors::typeMismatch, std::string{ "VariantBase::GetValue called with wrong type. Called with: " + TypeName<ValueType>::GetName() + ", but stored value is: " + GetStoredTypeName() });
+            }
+
+            return thisPtr->GetValue();
+        }
+
+        template <typename ValueType>
+        const ValueType& VariantBase::GetValue() const
+        {
+            const auto thisPtr = dynamic_cast<const VariantDetail::VariantDerived<std::decay_t<ValueType>>*>(this);
+            if (thisPtr == nullptr)
+            {
+                throw InputException(InputExceptionErrors::typeMismatch, std::string{ "VariantBase::GetValue called with wrong type. Called with: " + TypeName<ValueType>::GetName() + ", but stored value is: " + GetStoredTypeName() });
+            }
+
+            return thisPtr->GetValue();
+        }
+
+        template <typename ValueType>
+        void VariantBase::SetValue(ValueType&& value)
+        {
+            auto thisPtr = dynamic_cast<VariantDetail::VariantDerived<std::decay_t<ValueType>>*>(this);
+            if (thisPtr == nullptr)
+            {
+                throw InputException(InputExceptionErrors::typeMismatch, std::string{ "VariantBase::SetValue called with wrong type. Type: " + TypeName<ValueType>::GetName() });
+            }
+            thisPtr->SetValue(value);
+        }
+
+        //
+        // VariantDerived implementation
+        //
+        template <typename ValueType>
+        VariantDerived<ValueType>::VariantDerived() :
+            VariantBase(typeid(ValueType)),
+            _value(ValueType()),
+            _typeName(TypeName<ValueType>::GetName())
+        {
+        }
+
+        template <typename ValueType>
+        VariantDerived<ValueType>::VariantDerived(const ValueType& val) :
+            VariantBase(typeid(ValueType)),
+            _value(val),
+            _typeName(TypeName<ValueType>::GetName())
+        {
+        }
+
+        template <typename ValueType>
+        void VariantDerived<ValueType>::SetValue(const ValueType& value)
+        {
+            _value = value;
+        }
+
+        template <typename ValueType>
+        std::unique_ptr<VariantBase> VariantDerived<ValueType>::Clone() const
+        {
+            auto ptr = static_cast<VariantBase*>(new VariantDerived<ValueType>(_value));
+            return std::unique_ptr<VariantBase>(ptr);
+        }
+
+        template <typename ValueType>
+        std::string VariantDerived<ValueType>::ToString() const
+        {
+            return GetValueString(_value);
+        }
+
+        template <typename ValueType>
+        std::string VariantDerived<ValueType>::GetStoredTypeName() const
+        {
+            return TypeName<std::decay_t<ValueType>>::GetName();
+        }
+
+        template <typename ValueType>
+        intmax_t VariantDerived<ValueType>::GetIntValue() const
+        {
+            return CastToIntMax(_value);
+        }
+
+        template <typename ValueType>
+        long double VariantDerived<ValueType>::GetFloatValue() const
+        {
+            return CastToLongDouble(_value);
+        }
+
+        template <typename ValueType>
+        void VariantDerived<ValueType>::SetIntValue(intmax_t value)
+        {
+            bool success = TryConvertValue(value, _value);
+            if (!success)
+            {
+                throw utilities::InputException(utilities::InputExceptionErrors::typeMismatch);
+            }
+        }
+
+        template <typename ValueType>
+        void VariantDerived<ValueType>::SetFloatValue(long double value)
+        {
+            bool success = TryConvertValue(value, _value);
+            if (!success)
+            {
+                throw utilities::InputException(utilities::InputExceptionErrors::typeMismatch);
+            }
+        }
+
+        template <typename ValueType>
+        void VariantDerived<ValueType>::ParseInto(const std::string& s)
+        {
+            if (!TryParseInto(s))
+            {
+                throw utilities::InputException(utilities::InputExceptionErrors::typeMismatch);
+            }
+        }
+
+        template <typename ValueType>
+        bool VariantDerived<ValueType>::TryParseInto(const std::string& s)
+        {
+            return TryParseValue(s, _value);
+        }
+
+        // template <typename ValueType>
+        // void VariantDerived<ValueType>::operator++()
+        // {
+        // }
+
+        // template <typename ValueType>
+        // void VariantDerived<ValueType>::operator++(int)
+        // {
+        // }
+
+        // template <typename ValueType>
+        // void VariantDerived<ValueType>::operator--()
+        // {
+        // }
+
+        // template <typename ValueType>
+        // void VariantDerived<ValueType>::operator--(int)
+        // {
+        // }
+
+        template <typename ValueType>
+        void VariantDerived<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
+        {
+            ArchiveValue(archiver, GetValue());
+            // archiver << _value;
+        }
+
+        template <typename ValueType>
+        void VariantDerived<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
+        {
+            UnarchiveValue(archiver, _value);
+            // archiver >> _value;
+        }
+
+    } // namespace VariantDetail
+
+    //
+    // Variant implementation
+    //
+    template <typename ValueType, ValueType Default>
+    Variant::Variant() :
+        _type(std::type_index(typeid(ValueType)))
+    {
+        auto derivedPtr = new VariantDetail::VariantDerived<std::decay_t<ValueType>>(Default);
+        auto basePtr = static_cast<VariantDetail::VariantBase*>(derivedPtr);
+        _value = std::unique_ptr<VariantDetail::VariantBase>(basePtr);
+    }
+
+    template <typename ValueType, IsNotVariant<ValueType> concept>
+    Variant::Variant(ValueType&& value) :
+        _type(std::type_index(typeid(ValueType)))
+    {
+        static_assert(!std::is_same<std::decay_t<ValueType>, Variant>(), "Can't make a Variant of a Variant");
+        auto derivedPtr = new VariantDetail::VariantDerived<std::decay_t<ValueType>>(std::forward<ValueType>(value));
+        auto basePtr = static_cast<VariantDetail::VariantBase*>(derivedPtr);
+        _value = std::unique_ptr<VariantDetail::VariantBase>(basePtr);
+    }
+
+    template <typename ValueType>
+    const ValueType& Variant::GetValue() const
+    {
+        if (!_value)
+        {
+            throw InputException(InputExceptionErrors::nullReference, std::string{ "Variant::GetValue called on empty Variant" });
+        }
+
+        if (std::type_index(typeid(ValueType)) != _type)
+        {
+            throw InputException(InputExceptionErrors::typeMismatch, std::string{ "VariantBase::GetValue called with wrong type. Called with: " + TypeName<ValueType>::GetName() + ", but stored value is: " + GetStoredTypeName() });
+        }
+
+        return GetBasePointer()->GetValue<ValueType>();
+    }
+
+    template <typename ValueType>
+    bool Variant::TryGetValue(ValueType& value) const
+    {
+        if (!IsType<ValueType>())
+        {
+            return false;
+        }
+
+        value = GetBasePointer()->GetValue<ValueType>();
+        return true;
+    }
+
+    template <typename ValueType>
+    void Variant::SetValue(ValueType&& value)
+    {
+        if (!TrySetValue(value))
+        {
+            throw InputException(InputExceptionErrors::typeMismatch, std::string{ "Variant::SetValue called with wrong type. Type: " + TypeName<ValueType>::GetName() });
+        }
+    }
+
+    template <typename ValueType>
+    bool Variant::TrySetValue(ValueType&& value)
+    {
+        if (!IsType<std::decay_t<ValueType>>())
+        {
+            return false;
+        }
+
+        _value->SetValue(value);
+        return true;
+    }
+
+    template <typename ValueType>
+    void Variant::ResetValue(ValueType&& value)
+    {
+        static_assert(!std::is_same<std::decay_t<ValueType>, Variant>(), "Can't make a Variant of a Variant");
+
+        _type = std::type_index(typeid(ValueType));
+        auto derivedPtr = new VariantDetail::VariantDerived<std::decay_t<ValueType>>(std::forward<ValueType>(value));
+        auto basePtr = static_cast<VariantDetail::VariantBase*>(derivedPtr);
+        _value = std::unique_ptr<VariantDetail::VariantBase>(basePtr);
+    }
+
+    template <typename ValueType>
+    ValueType Variant::GetValueAs() const
+    {
+        ValueType result;
+        if (!TryGetValueAs<ValueType>(result))
+        {
+            throw InputException(InputExceptionErrors::typeMismatch, "Could not cast Variant value to given type");
+        }
+        return result;
+    }
+
+    template <typename ValueType>
+    bool Variant::TryGetValueAs(ValueType& value) const
+    {
+        if (_value == nullptr)
+        {
+            return false;
+        }
+
+        if (IsType<ValueType>())
+        {
+            return TryGetValue<ValueType>(value);
+        }
+        else if (std::is_same<ValueType, std::string>())
+        {
+            return VariantDetail::TryConvertValue(_value->ToString(), value);
+        }
+        else if (IsIntegralType())
+        {
+            return VariantDetail::TryConvertValue(_value->GetIntValue(), value);
+        }
+        else if (IsEnumType())
+        {
+            return VariantDetail::TryConvertValue(_value->GetIntValue(), value);
+        }
+        else if (IsFloatingPointType())
+        {
+            return VariantDetail::TryConvertValue(_value->GetFloatValue(), value);
+        }
+        return TryGetValue<ValueType>(value);
+    }
+
+    template <typename ValueType>
+    void Variant::SetValueFrom(ValueType&& value)
+    {
+        if (!TrySetValueFrom(value))
+        {
+            throw InputException(InputExceptionErrors::typeMismatch, "Could not set Variant value from given type");
+        }
+    }
+
+    template <typename ValueType>
+    bool Variant::TrySetValueFrom(ValueType&& value)
+    {
+        using std::to_string;
+        using utilities::to_string;
+
+        static_assert(!std::is_same<std::decay_t<ValueType>, Variant>(), "Can't set value from a Variant");
+
+        if (_value == nullptr)
+        {
+            return false;
+        }
+
+        if (IsType<ValueType>())
+        {
+            return TrySetValue(value);
+        }
+        else if (IsType<std::string>())
+        {
+            return TrySetValue(to_string(value));
+        }
+        else if (IsIntegralType())
+        {
+            // If we are integral, we can accept integral, enum, or floating-point types
+            if (std::is_fundamental<std::decay_t<ValueType>>::value || std::is_enum<std::decay_t<ValueType>>::value)
+            {
+                _value->SetIntValue(VariantDetail::CastToIntMax(value));
+                return true;
+            }
+            return false;
+        }
+        else if (IsEnumType())
+        {
+            // If we are integral, we can accept integral or enum types
+            if (std::is_integral<std::decay_t<ValueType>>::value || std::is_enum<std::decay_t<ValueType>>::value)
+            {
+                _value->SetIntValue(VariantDetail::CastToIntMax(value));
+                return true;
+            }
+            return false;
+        }
+        else if (IsFloatingPointType())
+        {
+            // If we are integral, we can accept integral or floating-point types
+            if (std::is_fundamental<std::decay_t<ValueType>>::value)
+            {
+                _value->SetFloatValue(VariantDetail::CastToLongDouble(value));
+                return true;
+            }
+        }
+        return TrySetValue(value);
+    }
+
+    inline bool Variant::TrySetValueFrom(Variant& other)
+    {
+        return TrySetValueFrom(static_cast<const Variant&>(other));
+    }
+
+    inline bool Variant::TrySetValueFrom(const Variant& other)
+    {
+        if (IsSameTypeAs(other))
+        {
+            if (other._value)
+            {
+                _value = other._value->Clone();
+            }
+            else
+            {
+                _value = nullptr;
+            }
+            return true;
+        }
+
+        if ((IsIntegralType() || IsEnumType()) && (other.IsIntegralType() || other.IsEnumType()))
+        {
+            _value->SetIntValue(other._value->GetIntValue());
+            return true;
+        }
+        else if ((IsIntegralType() || IsEnumType()) && (other.IsFloatingPointType()))
+        {
+            _value->SetIntValue(static_cast<intmax_t>(other._value->GetFloatValue()));
+            return true;
+        }
+        else if ((IsFloatingPointType()) && (other.IsIntegralType() || other.IsEnumType()))
+        {
+            _value->SetFloatValue(static_cast<long double>(other._value->GetIntValue()));
+            return true;
+        }
+        else if ((IsFloatingPointType()) && (other.IsFloatingPointType()))
+        {
+            _value->SetFloatValue(other._value->GetFloatValue());
+            return true;
+        }
+
+        return false;
+    }
+
+    template <typename ValueType, IsNotVariant<ValueType> concept>
+    Variant& Variant::operator=(ValueType&& value)
+    {
+        static_assert(!std::is_same<std::decay_t<ValueType>, Variant>(), "Can't make a Variant of a Variant");
+        _type = std::type_index(typeid(ValueType));
+        auto derivedPtr = new VariantDetail::VariantDerived<std::decay_t<ValueType>>(std::forward<ValueType>(value));
+        auto basePtr = static_cast<VariantDetail::VariantBase*>(derivedPtr);
+        _value = std::unique_ptr<VariantDetail::VariantBase>(basePtr);
+        return *this;
+    }
+
+    template <typename ValueType>
+    bool Variant::IsType() const
+    {
+        return (_value != nullptr && std::type_index(typeid(ValueType)) == _type);
+    }
+
+    template <typename ValueType>
+    void Variant::RegisterArchivableVariantType(VariantTypeRegistry& registry)
+    {
+        registry.SetVariantTypeFunction<ValueType>([](Variant& variant) {
+            variant.ResetValue<ValueType>(ValueType());
+        });
+    }
+
+    template <typename ValueType>
+    void Variant::RegisterArchivableVariantVectorType(VariantTypeRegistry& registry)
+    {
+        using VectorType = std::vector<ValueType>;
+        registry.SetVariantTypeFunction<VectorType>([](Variant& variant) {
+            variant.ResetValue<VectorType>(VectorType());
+        });
+    }
+
+    template <typename ValueType, typename... Args>
+    Variant MakeVariant(Args&&... args)
+    {
+        auto derivedPtr = new VariantDetail::VariantDerived<std::decay_t<ValueType>>(std::forward<Args>(args)...);
+        auto basePtr = static_cast<VariantDetail::VariantBase*>(derivedPtr);
+        return Variant(std::type_index(typeid(ValueType)), std::unique_ptr<VariantDetail::VariantBase>(basePtr));
+    }
+
+    //
+    // Helper functions
+    //
+
+    // GetTupleFromVariants
+    template <typename ArgsTupleType, size_t... Sequence>
+    ArgsTupleType GetArgTupleFromVariantsHelper(const std::vector<utilities::Variant>& args, std::index_sequence<Sequence...>)
+    {
+        return ArgsTupleType({ args[Sequence].GetValue<typename std::tuple_element<Sequence, ArgsTupleType>::type>() }...);
+    }
+
+    /// Fills in tuple with values taken from vector of Variants
+    template <typename ArgsTupleType>
+    ArgsTupleType GetTupleFromVariants(const std::vector<Variant>& args)
+    {
+        return GetArgTupleFromVariantsHelper<ArgsTupleType>(args, std::make_index_sequence<std::tuple_size<ArgsTupleType>::value>());
+    }
+
+    template <typename FunctionType>
+    utilities::FunctionArgTypes<FunctionType> GetArgTupleFromVariants(FunctionType& function, const std::vector<utilities::Variant>& args)
+    {
+        using ArgTypes = utilities::FunctionArgTypes<FunctionType>;
+        return GetArgTupleFromVariantsHelper<ArgTypes>(args);
+    }
+
+    // GetVariantsFromTupleType
+    template <typename ValueTupleType, size_t... Sequence>
+    std::vector<utilities::Variant> GetTupleVariantTypesHelper(std::index_sequence<Sequence...>)
+    {
+        return { utilities::MakeVariant<std::tuple_element_t<Sequence, ValueTupleType>>()... };
+    }
+
+    template <typename ValueTupleType>
+    std::vector<utilities::Variant> GetVariantsFromTupleType()
+    {
+        return GetTupleVariantTypesHelper<ValueTupleType>(std::make_index_sequence<std::tuple_size<ValueTupleType>::value>{});
+    }
+
+    // GetVariantsFromFunctionArgs
+    template <typename FunctionType>
+    std::vector<utilities::Variant> GetVariantsFromFunctionArgs()
+    {
+        return GetVariantsFromTupleType<utilities::FunctionArgTypes<FunctionType>>();
+    }
+
+    template <typename FunctionType>
+    std::vector<utilities::Variant> GetVariantsFromFunctionArgs(FunctionType&)
+    {
+        return GetVariantsFromTupleType<utilities::FunctionArgTypes<FunctionType>>();
+    }
+
+    template <typename FunctionType, size_t... Sequence>
+    auto CallFunctionWithVariantsHelper(FunctionType& function, const std::vector<utilities::Variant>& args, std::index_sequence<Sequence...>) -> FunctionReturnType<FunctionType>
+    {
+        auto argsTuple = GetTupleFromVariants(function, args);
+        return function(std::get<Sequence>(argsTuple)...);
+    }
+
+    template <typename FunctionType>
+    auto CallFunctionWithVariants(FunctionType& function, const std::vector<utilities::Variant>& args) -> FunctionReturnType<FunctionType>
+    {
+        using ArgTypes = utilities::FunctionArgTypes<FunctionType>;
+        return CallFunctionWithVariantsHelper(function, args, std::make_index_sequence<std::tuple_size<ArgTypes>::value>());
+    }
+} // namespace utilities
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/utilities/include/XmlArchiver.h b/libraries/utilities/include/XmlArchiver.h
index 677ac0c77..db47c322b 100644
--- a/libraries/utilities/include/XmlArchiver.h
+++ b/libraries/utilities/include/XmlArchiver.h
@@ -157,4 +157,280 @@ namespace utilities
 } // namespace utilities
 } // namespace ell
 
-#include "../tcc/XmlArchiver.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace utilities
+{
+    //
+    // Serialization
+    //
+    template <typename ValueType, IsFundamental<ValueType> concept>
+    void XmlArchiver::WriteScalar(const char* name, const ValueType& value)
+    {
+        EnsureMaxPrecision<ValueType> precisionScope(_out);
+        using std::to_string;
+        auto indent = GetCurrentIndent();
+        bool hasName = name != std::string("");
+        auto endOfLine = hasName ? "\n" : "";
+        auto typeName = XmlUtilities::EncodeTypeName(GetArchivedTypeName<ValueType>());
+
+        _out << indent;
+        _out << "<" << typeName;
+
+        if (hasName)
+        {
+            _out << " name='" << name << "'";
+        }
+
+        _out << " value='" << to_string(value) << "'/>" << endOfLine;
+    }
+
+    // Specialization for bool (though perhaps this should be an overload, not a specialization)
+    template <>
+    inline void XmlArchiver::WriteScalar(const char* name, const bool& value)
+    {
+        auto indent = GetCurrentIndent();
+        bool hasName = name != std::string("");
+        auto endOfLine = hasName ? "\n" : "";
+        auto typeName = "bool";
+
+        _out << indent;
+        _out << "<" << typeName;
+
+        if (hasName)
+        {
+            _out << " name='" << name << "'";
+        }
+
+        _out << " value='" << (value ? "true" : "false") << "'/>" << endOfLine;
+    }
+
+    inline std::string XmlEncodeString(std::string s)
+    {
+        return s;
+    }
+
+    // This function is inline just so it appears next to the other Write* functions
+    inline void XmlArchiver::WriteScalar(const char* name, const char* value)
+    {
+        auto indent = GetCurrentIndent();
+        bool hasName = name != std::string("");
+        auto endOfLine = hasName ? "\n" : "";
+        auto typeName = "string";
+
+        _out << indent;
+        _out << "<" << typeName;
+
+        if (hasName)
+        {
+            _out << " name='" << name << "'";
+        }
+        _out << " value='" << XmlUtilities::EncodeAttributeString(value) << "'/>" << endOfLine;
+    }
+
+    inline void XmlArchiver::WriteScalar(const char* name, const std::string& value)
+    {
+        auto indent = GetCurrentIndent();
+        bool hasName = name != std::string("");
+        auto endOfLine = hasName ? "\n" : "";
+        auto typeName = "string";
+
+        _out << indent;
+        _out << "<" << typeName;
+
+        if (hasName)
+        {
+            _out << " name='" << name << "'";
+        }
+        _out << " value='" << XmlUtilities::EncodeAttributeString(value) << "'/>" << endOfLine;
+    }
+
+    template <typename ValueType>
+    void XmlArchiver::WriteArray(const char* name, const std::vector<ValueType>& array)
+    {
+        bool hasName = name != std::string("");
+        auto indent = GetCurrentIndent();
+        auto typeName = XmlUtilities::EncodeTypeName(GetArchivedTypeName<ValueType>());
+
+        _out << indent;
+        _out << "<Array";
+        if (hasName)
+        {
+            _out << " name='" << name << "'";
+        }
+        _out << " type='" << typeName << "'>" << std::endl;
+
+        // Indent the next line (the line with the array elements), and then
+        // set the indent to 0 (so there isn't indentation inside the line)
+        auto oldIndent = _indent;
+        IncrementIndent();
+        indent = GetCurrentIndent();
+        _out << indent;
+
+        SetIndent(0);
+        for (const auto& item : array)
+        {
+            Archive(item);
+            _out << " ";
+        }
+        SetIndent(oldIndent);
+        _out << std::endl;
+        _out << indent;
+        _out << "</Array>" << std::endl;
+    }
+
+    //
+    // Deserialization
+    //
+    template <typename ValueType, IsIntegral<ValueType> concept>
+    void XmlUnarchiver::ReadScalar(const char* name, ValueType& value)
+    {
+        auto typeName = XmlUtilities::EncodeTypeName(GetArchivedTypeName<ValueType>());
+        bool hasName = name != std::string("");
+
+        _tokenizer.MatchTokens({ "<", typeName });
+        if (hasName)
+        {
+            _tokenizer.MatchTokens({ "name", "=", "'", name, "'" });
+        }
+        _tokenizer.MatchTokens({ "value", "=", "'" });
+
+        // read value
+        auto valueToken = _tokenizer.ReadNextToken();
+        value = static_cast<ValueType>(std::stoll(valueToken));
+
+        _tokenizer.MatchTokens({ "'", "/", ">" });
+    }
+
+    template <typename ValueType, IsFloatingPoint<ValueType> concept>
+    void XmlUnarchiver::ReadScalar(const char* name, ValueType& value)
+    {
+        auto typeName = XmlUtilities::EncodeTypeName(GetArchivedTypeName<ValueType>());
+        bool hasName = name != std::string("");
+
+        _tokenizer.MatchTokens({ "<", typeName });
+        if (hasName)
+        {
+            _tokenizer.MatchTokens({ "name", "=", "'", name, "'" });
+        }
+        _tokenizer.MatchTokens({ "value", "=", "'" });
+
+        // read value
+        auto valueToken = _tokenizer.ReadNextToken();
+        value = static_cast<ValueType>(std::stod(valueToken));
+
+        _tokenizer.MatchTokens({ "'", "/", ">" });
+    }
+
+    template <>
+    inline void XmlUnarchiver::ReadScalar(const char* name, bool& value)
+    {
+        auto typeName = "bool";
+        bool hasName = name != std::string("");
+
+        _tokenizer.MatchTokens({ "<", typeName });
+        if (hasName)
+        {
+            _tokenizer.MatchTokens({ "name", "=", "'", name, "'" });
+        }
+        _tokenizer.MatchTokens({ "value", "=", "'" });
+
+        // read value
+        auto valueToken = _tokenizer.ReadNextToken();
+        value = (valueToken == "true");
+
+        _tokenizer.MatchTokens({ "'", "/", ">" });
+    }
+
+    // This function is inline just so it appears next to the other Read* functions
+    inline void XmlUnarchiver::ReadScalar(const char* name, std::string& value)
+    {
+        auto typeName = "string";
+        bool hasName = name != std::string("");
+
+        _tokenizer.MatchTokens({ "<", typeName });
+        if (hasName)
+        {
+            _tokenizer.MatchTokens({ "name", "=", "'", name, "'" });
+        }
+        _tokenizer.MatchTokens({ "value", "=", "'" });
+
+        // read value
+        auto valueToken = _tokenizer.ReadNextToken();
+        value = XmlUtilities::DecodeAttributeString(valueToken);
+
+        _tokenizer.MatchTokens({ "'", "/", ">" });
+    }
+
+    template <typename ValueType, IsFundamental<ValueType> concept>
+    void XmlUnarchiver::ReadArray(const char* name, std::vector<ValueType>& array)
+    {
+        auto typeName = XmlUtilities::EncodeTypeName(GetArchivedTypeName<ValueType>());
+        bool hasName = name != std::string("");
+
+        _tokenizer.MatchTokens({ "<", "Array" });
+        if (hasName)
+        {
+            _tokenizer.MatchTokens({ "name", "=", "'", name, "'" });
+        }
+
+        _tokenizer.MatchTokens({ "type", "=", "'", typeName, "'", ">" });
+        while (true)
+        {
+            ValueType obj;
+            Unarchive(obj);
+            array.push_back(obj);
+
+            // check for '</'
+            auto token1 = _tokenizer.ReadNextToken();
+            auto token2 = _tokenizer.ReadNextToken();
+            _tokenizer.PutBackToken(token2);
+            _tokenizer.PutBackToken(token1);
+            if (token1 + token2 == "</")
+            {
+                break;
+            }
+        }
+
+        _tokenizer.MatchTokens({ "<", "/", "Array", ">" });
+    }
+
+    inline void XmlUnarchiver::ReadArray(const char* name, std::vector<std::string>& array)
+    {
+        auto typeName = XmlUtilities::EncodeTypeName(TypeName<std::string>::GetName());
+        bool hasName = name != std::string("");
+
+        _tokenizer.MatchTokens({ "<", "Array" });
+        if (hasName)
+        {
+            _tokenizer.MatchTokens({ "name", "=", "'", name, "'" });
+        }
+
+        _tokenizer.MatchTokens({ "type", "=", "'", typeName, "'", ">" });
+
+        std::string nextToken = "";
+        while (true)
+        {
+            std::string obj;
+            Unarchive(obj);
+            array.push_back(obj);
+
+            // check for '</'
+            auto token1 = _tokenizer.ReadNextToken();
+            auto token2 = _tokenizer.ReadNextToken();
+            _tokenizer.PutBackToken(token2);
+            _tokenizer.PutBackToken(token1);
+            if (token1 + token2 == "</")
+            {
+                break;
+            }
+        }
+
+        _tokenizer.MatchTokens({ "<", "/", "Array", ">" });
+    }
+} // namespace utilities
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/utilities/tcc/AbstractInvoker.tcc b/libraries/utilities/tcc/AbstractInvoker.tcc
deleted file mode 100644
index fb39bfc9f..000000000
--- a/libraries/utilities/tcc/AbstractInvoker.tcc
+++ /dev/null
@@ -1,35 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     AbstractInvoker.tcc (utilities)
-//  Authors:  Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace utilities
-{
-    template <typename BaseType, typename DerivedType, typename... DerivedTypes>
-    template <typename ReturnType, typename FunctorType>
-    ReturnType AbstractInvoker<BaseType, DerivedType, DerivedTypes...>::Invoke(const FunctorType& functor, const BaseType* basePointer)
-    {
-        const DerivedType* ptr = dynamic_cast<const DerivedType*>(basePointer);
-        if (ptr != nullptr)
-        {
-            return functor(ptr);
-        }
-        else
-        {
-            return AbstractInvoker<BaseType, DerivedTypes...>::template Invoke<ReturnType>(functor, basePointer);
-        }
-    }
-
-    template <typename BaseType>
-    template <typename ReturnType, typename FunctorType>
-    ReturnType AbstractInvoker<BaseType>::Invoke(const FunctorType& /*functor*/, const BaseType* /*basePointer*/)
-    {
-        throw LogicException(LogicExceptionErrors::illegalState, "base type reference could not be matched with a derived type");
-    }
-} // namespace utilities
-} // namespace ell
diff --git a/libraries/utilities/tcc/AnyIterator.tcc b/libraries/utilities/tcc/AnyIterator.tcc
deleted file mode 100644
index 80bd17653..000000000
--- a/libraries/utilities/tcc/AnyIterator.tcc
+++ /dev/null
@@ -1,107 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     AnyIterator.tcc (utilities)
-//  Authors:  Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace utilities
-{
-    //
-    // wrapper to convert anything that happens to conform to IIterator interface to an IIterator
-    //
-    template <typename IteratorType, typename ValueType>
-    class IteratorWrapper : public IIterator<ValueType>
-    {
-    public:
-        IteratorWrapper(const IteratorWrapper<IteratorType, ValueType>& other) = default;
-        IteratorWrapper(IteratorWrapper<IteratorType, ValueType>&& other) = default;
-
-        IteratorWrapper(IteratorType&& inputIterator) :
-            _iterator(inputIterator) {}
-
-        virtual bool IsValid() const override { return _iterator.IsValid(); }
-        virtual bool HasSize() const override { return _iterator.HasSize(); }
-        virtual size_t NumItemsLeft() const override { return _iterator.NumItemsLeft(); }
-        virtual void Next() override { _iterator.Next(); }
-        virtual ValueType Get() const override { return _iterator.Get(); }
-
-    private:
-        IteratorType _iterator;
-    };
-
-    //
-    // AnyIterator class implementation
-    //
-    template <typename ValueType>
-    AnyIterator<ValueType>::AnyIterator(std::shared_ptr<IIterator<ValueType>> iterator) :
-        _iterator(iterator)
-    {
-    }
-
-    template <typename ValueType>
-    bool AnyIterator<ValueType>::IsValid() const
-    {
-        if (_iterator == nullptr)
-        {
-            std::string funcName = __func__;
-            throw Exception(funcName + ": invalid iterator");
-        }
-        return _iterator->IsValid();
-    }
-
-    template <typename ValueType>
-    bool AnyIterator<ValueType>::HasSize() const
-    {
-        if (_iterator == nullptr)
-        {
-            std::string funcName = __func__;
-            throw Exception(funcName + ": invalid iterator");
-        }
-        return _iterator->HasSize();
-    }
-
-    template <typename ValueType>
-    size_t AnyIterator<ValueType>::NumItemsLeft() const
-    {
-        if (_iterator == nullptr)
-        {
-            std::string funcName = __func__;
-            throw Exception(funcName + ": invalid iterator");
-        }
-        return _iterator->NumItemsLeft();
-    }
-
-    template <typename ValueType>
-    void AnyIterator<ValueType>::Next()
-    {
-        if (_iterator == nullptr)
-        {
-            std::string funcName = __func__;
-            throw Exception(funcName + ": invalid iterator");
-        }
-        _iterator->Next();
-    }
-
-    template <typename ValueType>
-    ValueType AnyIterator<ValueType>::Get() const
-    {
-        if (_iterator == nullptr)
-        {
-            std::string funcName = __func__;
-            throw Exception(funcName + ": invalid iterator");
-        }
-        return _iterator->Get();
-    }
-
-    template <typename IteratorType, typename ValueType>
-    AnyIterator<ValueType> MakeAnyIterator(IteratorType&& iter)
-    {
-        auto wrapper = std::make_shared<IteratorWrapper<IteratorType, ValueType>>(std::forward<IteratorType>(iter));
-        return AnyIterator<ValueType>(wrapper);
-    }
-} // namespace utilities
-} // namespace ell
diff --git a/libraries/utilities/tcc/Archiver.tcc b/libraries/utilities/tcc/Archiver.tcc
deleted file mode 100644
index e328f3c39..000000000
--- a/libraries/utilities/tcc/Archiver.tcc
+++ /dev/null
@@ -1,422 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     Archiver.tcc (utilities)
-//  Authors:  Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace utilities
-{
-    //
-    // PropertyArchiver class
-    //
-
-    template <typename ValueType>
-    void Archiver::PropertyArchiver::operator<<(ValueType&& value)
-    {
-        _archiver.Archive(_propertyName.c_str(), value);
-    }
-
-    //
-    // Archiver class
-    //
-
-    template <typename ValueType>
-    void Archiver::Archive(ValueType&& value)
-    {
-        Archive("", std::forward<ValueType>(value));
-    }
-
-    template <typename ValueType>
-    void Archiver::operator<<(ValueType&& value)
-    {
-        Archive(std::forward<ValueType>(value));
-    }
-
-    template <typename ValueType>
-    void Archiver::Archive(const char* name, ValueType&& value)
-    {
-        ArchiveItem(name, value);
-    }
-
-    //
-    // Implementations
-    //
-
-    // unique pointer to non-archivable object
-    template <typename ValueType, IsNotArchivable<ValueType> concept>
-    void Archiver::ArchiveItem(const char* name, const std::unique_ptr<ValueType>& value)
-    {
-        if (!value.get())
-        {
-            // write out a special value indicating null value.
-            ArchiveNull(name);
-        }
-        else
-        {
-            // write out a special value indicating null value.
-            ArchiveItem(name, *value.get());
-        }
-    }
-
-    // unique pointer to standard archivable object
-    template <typename ValueType, IsStandardArchivable<ValueType> concept>
-    void Archiver::ArchiveItem(const char* name, const std::unique_ptr<ValueType>& value)
-    {
-        if (!value.get())
-        {
-            // write out a special value indicating null value.
-            ArchiveNull(name);
-        }
-        else
-        {
-            // write out a special value indicating null value.
-            ArchiveItem(name, *value.get());
-        }
-    }
-
-    // unique pointer to archived-as-primitive object
-    template <typename ValueType, IsArchivedAsPrimitive<ValueType> concept>
-    void Archiver::ArchiveItem(const char* name, const std::unique_ptr<ValueType>& value)
-    {
-        if (!value.get())
-        {
-            // write out a special value indicating null value.
-            ArchiveNull(name);
-        }
-        else
-        {
-            // write out a special value indicating null value.
-            ArchiveItem(name, *value.get());
-        }
-    }
-
-    // Non-vectors
-    template <typename ValueType, IsNotVector<ValueType> concept>
-    void Archiver::ArchiveItem(const char* name, ValueType&& value)
-    {
-        ArchiveValue(name, value);
-    }
-
-    // Pointers
-    template <typename ValueType>
-    void Archiver::ArchiveItem(const char* name, ValueType* value)
-    {
-        Archive(name, *value);
-    }
-
-    // Vector of fundamental types
-    template <typename ValueType, IsFundamental<ValueType> concept>
-    void Archiver::ArchiveItem(const char* name, const std::vector<ValueType>& array)
-    {
-        ArchiveArray(name, array);
-    }
-
-    // Vector of strings
-    inline void Archiver::ArchiveItem(const char* name, const std::vector<std::string>& array)
-    {
-        ArchiveArray(name, array);
-    }
-
-    // Vector of serializable objects
-    template <typename ValueType, IsIArchivable<ValueType> concept>
-    void Archiver::ArchiveItem(const char* name, const std::vector<ValueType>& array)
-    {
-        auto baseTypeName = GetArchivedTypeName<ValueType>();
-        std::vector<const utilities::IArchivable*> tmpArray;
-        for (const auto& item : array)
-        {
-            tmpArray.push_back(&item);
-        }
-        ArchiveArray(name, baseTypeName, tmpArray);
-    }
-
-    // Vector of serializable pointers
-    template <typename ValueType, IsIArchivable<ValueType> concept>
-    void Archiver::ArchiveItem(const char* name, const std::vector<const ValueType*>& array)
-    {
-        auto baseTypeName = GetArchivedTypeName<ValueType>();
-        std::vector<const utilities::IArchivable*> tmpArray;
-        for (const auto& item : array)
-        {
-            tmpArray.push_back(item);
-        }
-        ArchiveArray(name, baseTypeName, tmpArray);
-    }
-
-    //
-    // PropertyUnarchiver class
-    //
-    template <typename ValueType>
-    void Unarchiver::PropertyUnarchiver::operator>>(ValueType&& value)
-    {
-        _unarchiver.Unarchive(_propertyName.c_str(), value);
-    }
-
-    //
-    // OptionalPropertyUnarchiver class
-    //
-    template <typename DefaultValueType>
-    Unarchiver::OptionalPropertyUnarchiver<DefaultValueType>::OptionalPropertyUnarchiver(Unarchiver& archiver, const std::string& name, const DefaultValueType& defaultValue) :
-        _unarchiver(archiver),
-        _propertyName(name),
-        _defaultValue(defaultValue){};
-
-    template <typename DefaultValueType>
-    template <typename ValueType>
-    void Unarchiver::OptionalPropertyUnarchiver<DefaultValueType>::operator>>(ValueType&& value)
-    {
-        if (_unarchiver.HasNextPropertyName(_propertyName))
-        {
-            _unarchiver.Unarchive(_propertyName.c_str(), value);
-        }
-        else
-        {
-            value = _defaultValue;
-        }
-    }
-
-    template <>
-    template <typename ValueType>
-    void Unarchiver::OptionalPropertyUnarchiver<Unarchiver::NoDefault>::operator>>(ValueType&& value)
-    {
-        if (_unarchiver.HasNextPropertyName(_propertyName))
-        {
-            _unarchiver.Unarchive(_propertyName.c_str(), value);
-        }
-    }
-
-    //
-    // Unarchiver class
-    //
-    template <typename ValueType>
-    void Unarchiver::Unarchive(ValueType&& value)
-    {
-        Unarchive("", value);
-    }
-
-    template <typename ValueType>
-    void Unarchiver::operator>>(ValueType&& value)
-    {
-        Unarchive(std::forward<ValueType>(value));
-    }
-
-    template <typename ValueType>
-    void Unarchiver::Unarchive(const char* name, ValueType&& value)
-    {
-        UnarchiveItem(name, value);
-    }
-
-    // STYLE: inline to keep next to its sibling overload
-    inline Unarchiver::OptionalPropertyUnarchiver<Unarchiver::NoDefault> Unarchiver::OptionalProperty(const std::string& name)
-    {
-        return OptionalPropertyUnarchiver<Unarchiver::NoDefault>(*this, name, {});
-    }
-
-    template <typename DefaultValueType>
-    Unarchiver::OptionalPropertyUnarchiver<DefaultValueType> Unarchiver::OptionalProperty(const std::string& name, const DefaultValueType& defaultValue)
-    {
-        return OptionalPropertyUnarchiver<DefaultValueType>(*this, name, defaultValue);
-    }
-
-    // types:
-    // Fundamental
-    // IArchivable (& ArchivedAsPrimitive)
-    // Array
-    template <typename ValueType, IsNotVector<ValueType> concept1, IsNotArchivedAsPrimitive<ValueType> concept2>
-    void Unarchiver::UnarchiveItem(const char* name, ValueType&& value)
-    {
-        UnarchiveValue(name, value);
-    }
-
-    template <typename ValueType, IsNotVector<ValueType> concept1, IsArchivedAsPrimitive<ValueType> concept2>
-    void Unarchiver::UnarchiveItem(const char* name, ValueType&& value)
-    {
-        UnarchiveObjectAsPrimitive(name, value);
-    }
-
-    // unique pointer to non-archivable type
-    template <typename ValueType, IsNotArchivable<ValueType> concept>
-    void Unarchiver::UnarchiveItem(const char* name, std::unique_ptr<ValueType>& value)
-    {
-        if (!UnarchiveNull(name))
-        {
-            auto ptr = std::make_unique<ValueType>();
-            UnarchiveValue(name, *ptr);
-            value = std::move(ptr);
-        }
-    }
-
-    // unique pointer to standard archivable object
-    template <typename ValueType, IsStandardArchivable<ValueType> concept>
-    void Unarchiver::UnarchiveItem(const char* name, std::unique_ptr<ValueType>& value)
-    {
-        if (!UnarchiveNull(name))
-        {
-            auto baseTypeName = GetArchivedTypeName<ValueType>();
-            auto objInfo = BeginUnarchiveObject(name, baseTypeName);
-            _objectInfo.push_back(objInfo);
-            auto encodedTypeName = objInfo.type;
-            std::unique_ptr<ValueType> newPtr = GetContext().GetTypeFactory().Construct<ValueType>(encodedTypeName);
-            UnarchiveObject(name, *newPtr);
-            EndUnarchiveObject(name, encodedTypeName);
-            // TODO: assert back of _objectInfo == objInfo
-            _objectInfo.pop_back();
-            value = std::move(newPtr);
-        }
-    }
-
-    // pointer to serializable-as-primitive type
-    template <typename ValueType, IsArchivedAsPrimitive<ValueType> concept>
-    void Unarchiver::UnarchiveItem(const char* name, std::unique_ptr<ValueType>& value)
-    {
-        if (!UnarchiveNull(name))
-        {
-            auto baseTypeName = GetArchivedTypeName<ValueType>();
-            std::unique_ptr<ValueType> newPtr = std::make_unique<ValueType>();
-            UnarchiveObject(name, *newPtr);
-            value = std::move(newPtr);
-        }
-    }
-
-    // Vector of fundamental types
-    template <typename ValueType, IsFundamental<ValueType> concept>
-    void Unarchiver::UnarchiveItem(const char* name, std::vector<ValueType>& arr)
-    {
-        arr.clear();
-        UnarchiveArray(name, arr);
-    }
-
-    // Vector of strings
-    inline void Unarchiver::UnarchiveItem(const char* name, std::vector<std::string>& arr)
-    {
-        arr.clear();
-        UnarchiveArray(name, arr);
-    }
-
-    // Vector of serializable objects
-    template <typename ValueType, IsIArchivable<ValueType> concept>
-    void Unarchiver::UnarchiveItem(const char* name, std::vector<ValueType>& arr)
-    {
-        arr.clear();
-        auto typeName = GetArchivedTypeName<ValueType>();
-        BeginUnarchiveArray(name, typeName);
-        while (true)
-        {
-            auto good = BeginUnarchiveArrayItem(typeName);
-            if (!good)
-            {
-                break;
-            }
-            ValueType value;
-            Unarchive(value);
-            arr.push_back(value);
-            EndUnarchiveArrayItem(typeName);
-        }
-        EndUnarchiveArray(name, typeName);
-    }
-
-    // Vector of unique pointers to serializable objects
-    template <typename ValueType, IsIArchivable<ValueType> concept>
-    void Unarchiver::UnarchiveItem(const char* name, std::vector<std::unique_ptr<ValueType>>& arr)
-    {
-        arr.clear();
-        auto typeName = GetArchivedTypeName<ValueType>();
-        BeginUnarchiveArray(name, typeName);
-        while (true)
-        {
-            auto good = BeginUnarchiveArrayItem(typeName);
-            if (!good)
-            {
-                break;
-            }
-            std::unique_ptr<ValueType> newPtr;
-            Unarchive(newPtr);
-            arr.push_back(std::move(newPtr));
-            EndUnarchiveArrayItem(typeName);
-        }
-        EndUnarchiveArray(name, typeName);
-    }
-
-    // Vector of raw pointers to serializable objects
-    template <typename ValueType, IsIArchivable<ValueType> concept>
-    void Unarchiver::UnarchiveItem(const char* name, std::vector<const ValueType*>& arr)
-    {
-        arr.clear();
-        auto typeName = GetArchivedTypeName<ValueType>();
-        BeginUnarchiveArray(name, typeName);
-        while (true)
-        {
-            auto good = BeginUnarchiveArrayItem(typeName);
-            if (!good)
-            {
-                break;
-            }
-            std::unique_ptr<ValueType> newPtr;
-            Unarchive(newPtr);
-            arr.push_back(newPtr.release());
-            EndUnarchiveArrayItem(typeName);
-        }
-        EndUnarchiveArray(name, typeName);
-    }
-
-    //
-    // Utility classes
-    //
-
-    template <typename ValueType>
-    EnsureMaxPrecision<ValueType>::EnsureMaxPrecision(std::ostream& out) :
-        _flags(out.flags()),
-        _precision(out.precision()),
-        _out(out)
-    {
-        _out.precision(std::numeric_limits<ValueType>::digits10 + 1);
-    }
-
-    template <typename ValueType>
-    EnsureMaxPrecision<ValueType>::~EnsureMaxPrecision()
-    {
-        _out.flags(_flags);
-        _out.precision(_precision);
-    }
-
-    //
-    // Utility functions
-    //
-    namespace ArchiverImpl
-    {
-        template <typename T>
-        static std::string GetTypeName(...)
-        {
-            return TypeName<T>::GetName();
-        }
-
-        template <typename T, IsIntegral<T> concept = true>
-        static std::string GetTypeName(bool)
-        {
-            return "int";
-        }
-
-        template <typename T, IsFloatingPoint<T> concept = true>
-        static std::string GetTypeName(bool)
-        {
-            return "float";
-        }
-    } // namespace ArchiverImpl
-
-    template <typename T>
-    std::string GetArchivedTypeName()
-    {
-        return ArchiverImpl::GetTypeName<T>(true);
-    }
-
-    template <typename T>
-    std::string GetArchivedTypeName(const T& value)
-    {
-        return value.GetRuntimeTypeName();
-    }
-} // namespace utilities
-} // namespace ell
diff --git a/libraries/utilities/tcc/CStringParser.tcc b/libraries/utilities/tcc/CStringParser.tcc
deleted file mode 100644
index 17a2c5974..000000000
--- a/libraries/utilities/tcc/CStringParser.tcc
+++ /dev/null
@@ -1,330 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     CStringParser.tcc (utilities)
-//  Authors:  Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace utilities
-{
-    // wrapper for strtof
-    inline ParseResult cParse(const char* pStr, char*& pEnd, float& value)
-    {
-        if (IsWhitespace(*pStr))
-        {
-            return ParseResult::badFormat;
-        }
-
-        auto tmp = errno;
-        errno = 0;
-
-        value = strtof(pStr, &pEnd);
-
-        if (pStr == pEnd)
-        {
-            return ParseResult::badFormat;
-        }
-        if (errno == ERANGE)
-        {
-            return ParseResult::outOfRange;
-        }
-
-        errno = tmp;
-        return ParseResult::success;
-    }
-
-    // wrapper for std::strtod
-    inline ParseResult cParse(const char* pStr, char*& pEnd, double& value)
-    {
-        if (IsWhitespace(*pStr))
-        {
-            return ParseResult::badFormat;
-        }
-
-        auto tmp = errno;
-        errno = 0;
-
-        value = std::strtod(pStr, &pEnd);
-
-        if (pStr == pEnd)
-        {
-            return ParseResult::badFormat;
-        }
-        if (errno == ERANGE)
-        {
-            return ParseResult::outOfRange;
-        }
-
-        errno = tmp;
-        return ParseResult::success;
-    }
-
-    // wrapper for strtoul
-    inline ParseResult cParse(const char* pStr, char*& pEnd, unsigned int& value)
-    {
-        if (!IsDigit(*pStr))
-        {
-            return ParseResult::badFormat;
-        }
-
-        auto tmp = errno;
-        errno = 0;
-
-        auto x = strtoul(pStr, &pEnd, 0);
-        if (x != static_cast<unsigned int>(x))
-        {
-            return ParseResult::outOfRange;
-        }
-
-        value = static_cast<unsigned int>(x);
-
-        if (pStr == pEnd)
-        {
-            return ParseResult::badFormat;
-        }
-        if (errno == ERANGE)
-        {
-            return ParseResult::outOfRange;
-        }
-
-        errno = tmp;
-        return ParseResult::success;
-    }
-
-    // wrapper for strtoul
-    inline ParseResult cParse(const char* pStr, char*& pEnd, uint64_t& value)
-    {
-        if (!IsDigit(*pStr))
-        {
-            return ParseResult::badFormat;
-        }
-
-        auto tmp = errno;
-        errno = 0;
-
-        auto x = strtoul(pStr, &pEnd, 0);
-
-        if (pStr == pEnd)
-        {
-            return ParseResult::badFormat;
-        }
-        if (errno == ERANGE)
-        {
-            return ParseResult::outOfRange;
-        }
-        if (x != static_cast<uint64_t>(x))
-        {
-            return ParseResult::outOfRange;
-        }
-
-        value = static_cast<uint64_t>(x);
-
-        errno = tmp;
-        return ParseResult::success;
-    }
-
-    // wrapper for strtol
-    inline ParseResult cParse(const char* pStr, char*& pEnd, int& value)
-    {
-        if (IsWhitespace(*pStr))
-        {
-            return ParseResult::badFormat;
-        }
-
-        auto tmp = errno;
-        errno = 0;
-
-        auto x = strtol(pStr, &pEnd, 0);
-
-        if (pStr == pEnd)
-        {
-            return ParseResult::badFormat;
-        }
-        if (errno == ERANGE)
-        {
-            return ParseResult::outOfRange;
-        }
-        if (x != static_cast<int>(x))
-        {
-            return ParseResult::outOfRange;
-        }
-
-        value = static_cast<int>(x);
-
-        errno = tmp;
-        return ParseResult::success;
-    }
-
-    // wrapper for strtol
-    inline ParseResult cParse(const char* pStr, char*& pEnd, long& value)
-    {
-        if (IsWhitespace(*pStr))
-        {
-            return ParseResult::badFormat;
-        }
-
-        auto tmp = errno;
-        errno = 0;
-
-        value = strtol(pStr, &pEnd, 0);
-
-        if (pStr == pEnd)
-        {
-            return ParseResult::badFormat;
-        }
-        if (errno == ERANGE)
-        {
-            return ParseResult::outOfRange;
-        }
-
-        errno = tmp;
-        return ParseResult::success;
-    }
-
-    // wrapper for strtoul
-    inline ParseResult cParse(const char* pStr, char*& pEnd, unsigned short& value)
-    {
-        if (!IsDigit(*pStr))
-        {
-            return ParseResult::badFormat;
-        }
-
-        auto tmp = errno;
-        errno = 0;
-
-        auto x = strtoul(pStr, &pEnd, 0);
-
-        if (pStr == pEnd)
-        {
-            return ParseResult::badFormat;
-        }
-        if (errno == ERANGE)
-        {
-            return ParseResult::outOfRange;
-        }
-        if (x != static_cast<unsigned short>(x))
-        {
-            return ParseResult::outOfRange;
-        }
-        value = static_cast<unsigned short>(x);
-
-        errno = tmp;
-        return ParseResult::success;
-    }
-
-    // wrapper for strtol
-    inline ParseResult cParse(const char* pStr, char*& pEnd, short& value)
-    {
-        if (IsWhitespace(*pStr))
-        {
-            return ParseResult::badFormat;
-        }
-
-        auto tmp = errno;
-        errno = 0;
-
-        long x = strtol(pStr, &pEnd, 0);
-
-        if (pStr == pEnd)
-        {
-            return ParseResult::badFormat;
-        }
-        if (errno == ERANGE)
-        {
-            return ParseResult::outOfRange;
-        }
-        if (x != static_cast<short>(x))
-        {
-            return ParseResult::outOfRange;
-        }
-
-        value = static_cast<short>(x);
-
-        errno = tmp;
-        return ParseResult::success;
-    }
-
-    // parse a single char from the input string.
-    inline ParseResult cParse(const char* pStr, char*& pEnd, char& value)
-    {
-        value = *pStr;
-        pEnd = const_cast<char*>(++pStr);
-        return ParseResult::success;
-    }
-
-    // parser for std:string, scans until finding the a character other than alphanumeric or '_'
-    inline ParseResult cParse(const char* pStr, char*& pEnd, std::string& value)
-    {
-        const char* iter = pStr;
-        while (std::isalnum(*iter) || *iter == '_')
-        {
-            ++iter;
-        }
-        value = std::string(pStr, iter);
-        pEnd = const_cast<char*>(iter);
-
-        return ParseResult::success;
-    }
-
-    // wrapper for strtoul
-    template <typename std::enable_if_t<!std::is_same<unsigned long, unsigned int>::value, int> = 0>
-    inline ParseResult cParse(const char* pStr, char*& pEnd, unsigned long& value)
-    {
-        if (!IsDigit(*pStr))
-        {
-            return ParseResult::badFormat;
-        }
-
-        auto tmp = errno;
-        errno = 0;
-
-        value = strtoul(pStr, &pEnd, 0);
-
-        if (pStr == pEnd)
-        {
-            return ParseResult::badFormat;
-        }
-        if (errno == ERANGE)
-        {
-            return ParseResult::outOfRange;
-        }
-
-        errno = tmp;
-        return ParseResult::success;
-    }
-
-    template <typename ValueType>
-    ParseResult Parse(const char*& pStr, ValueType& value)
-    {
-        // check for eof
-        if (IsEndOfString(*pStr))
-        {
-            return ParseResult::endOfString;
-        }
-
-        // check for "//" comment indicator
-        if (*pStr == '/')
-        {
-            if (*(pStr + 1) == '/')
-            {
-                return ParseResult::beginComment;
-            }
-        }
-
-        // check for "#" comment indicator
-        if (*pStr == '#')
-        {
-            return ParseResult::beginComment;
-        }
-
-        char* pEnd = nullptr;
-        auto parseResult = cParse(pStr, pEnd, value);
-        pStr = pEnd;
-
-        return parseResult;
-    }
-} // namespace utilities
-} // namespace ell
diff --git a/libraries/utilities/tcc/CommandLineParser.tcc b/libraries/utilities/tcc/CommandLineParser.tcc
deleted file mode 100644
index 3a0e8ed02..000000000
--- a/libraries/utilities/tcc/CommandLineParser.tcc
+++ /dev/null
@@ -1,130 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     CommandLineParser.tcc (utilities)
-//  Authors:  Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace utilities
-{
-    // format of argv: Main.exe [options]
-    // where options are of the form "-<std::string> <option>" where the <option> part is mandatory (defaulting to 'true')
-    // options have two names, the short name is used with a single hyphen, and the long name with two
-    // e.g., "-s true" and "--serial_mode true" can mean the same thing
-    // options are queried by the long name
-    // short name is optional
-    // args are just strings at the end
-    // example of valid commandlines:
-    // myexe.exe file1.tsv
-    // myexe.exe file1.tsv file2.tsv
-    // myexe.exe -t 8 -x someString file1.tsv file2.tsv
-    template <typename T, typename U>
-    void CommandLineParser::AddOption(T& option, std::string name, std::string shortName, std::string description, const U& defaultValue, std::string emptyValueString)
-    {
-        auto callback = [&option](std::string optionVal) {
-            bool didParse = ParseVal<T>(optionVal, option);
-            return didParse;
-        };
-
-        OptionInfo info(name, shortName, description, ToString(defaultValue), emptyValueString, callback);
-        AddOption(info);
-    }
-
-    template <typename T>
-    void CommandLineParser::AddOption(T& option, std::string name, std::string shortName, std::string description, std::initializer_list<std::pair<std::string, T>> enumValues, std::string defaultValue, std::string emptyValueString)
-    {
-        // transform initializer list into useful things that will stick around
-        std::vector<std::string> valueNameStrings;
-        std::vector<std::pair<std::string, T>> valueNamesTable;
-        for (auto v : enumValues)
-        {
-            valueNameStrings.push_back(v.first);
-            valueNamesTable.push_back(v);
-        }
-
-        auto callback = [&option, this, name, valueNamesTable](std::string optionVal) {
-            std::string optionString;
-            bool didParse = ParseVal<T>(optionVal, valueNamesTable, option, optionString);
-            if (didParse)
-            {
-                _options[name].currentValueString = optionString;
-                return true;
-            }
-            else
-            {
-                throw utilities::InputException(utilities::InputExceptionErrors::badStringFormat, "Could not parse value for option " + name);
-                return false;
-            }
-        };
-
-        OptionInfo info(name, shortName, description, defaultValue, emptyValueString, callback);
-        info.enumValues = valueNameStrings;
-        AddOption(info);
-    }
-
-    template <typename T>
-    bool CommandLineParser::ParseVal(std::string str, T& result)
-    {
-        std::stringstream ss(str);
-        ss >> result;
-        return true;
-    }
-
-    template <typename T>
-    bool CommandLineParser::ParseVal(std::string str, std::vector<std::pair<std::string, T>> valNames, T& result, std::string& resultString)
-    {
-        bool foundPartialMatch = false;
-        for (const auto& valNamePair : valNames)
-        {
-            // Exact match
-            if (valNamePair.first == str)
-            {
-                resultString = valNamePair.first;
-                result = valNamePair.second;
-                return true;
-            }
-
-            // Partial match
-            if (valNamePair.first.find(str) == 0)
-            {
-                // More than one partial match -- fail
-                if (foundPartialMatch)
-                {
-                    return false;
-                }
-                resultString = valNamePair.first;
-                result = valNamePair.second;
-                foundPartialMatch = true;
-            }
-        }
-
-        return foundPartialMatch;
-    }
-
-    template <typename T>
-    std::string CommandLineParser::ToString(const T& val)
-    {
-        std::stringstream ss;
-        ss << val;
-        return ss.str();
-    }
-
-    // bool specialization
-    template <>
-    inline bool CommandLineParser::ParseVal<bool>(std::string val, bool& result)
-    {
-        auto lowerval = ToLowercase(val);
-        result = (val == "true" || val == "t");
-        return true;
-    }
-
-    template <>
-    inline std::string CommandLineParser::ToString<bool>(const bool& val)
-    {
-        return val ? "true" : "false";
-    }
-} // namespace utilities
-} // namespace ell
diff --git a/libraries/utilities/tcc/Exception.tcc b/libraries/utilities/tcc/Exception.tcc
deleted file mode 100644
index 8398b9fdf..000000000
--- a/libraries/utilities/tcc/Exception.tcc
+++ /dev/null
@@ -1,20 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     LogicException.tcc (utilities)
-//  Authors:  Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace utilities
-{
-    template <typename ErrorCodeType>
-    ErrorCodeException<ErrorCodeType>::ErrorCodeException(ErrorCodeType errorCode, const std::string& message) :
-        Exception(message),
-        _errorCode(errorCode)
-    {
-    }
-} // namespace utilities
-} // namespace ell
diff --git a/libraries/utilities/tcc/Format.tcc b/libraries/utilities/tcc/Format.tcc
deleted file mode 100644
index 1664f64e5..000000000
--- a/libraries/utilities/tcc/Format.tcc
+++ /dev/null
@@ -1,154 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     Format.tcc (utilities)
-//  Authors:  Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace utilities
-{
-    Match::Match(const char* pStr) :
-        _pStr(pStr)
-    {
-    }
-
-    Match::Match(const std::string& str) :
-        _pStr(str.c_str())
-    {
-    }
-
-    Match::operator const char*()
-    {
-        return _pStr;
-    }
-
-    template <typename ArgType, typename... ArgTypes>
-    void PrintFormat(std::ostream& os, const char* format, const ArgType& arg, const ArgTypes&... args)
-    {
-        if (*format == '\0')
-        {
-            return;
-        }
-
-        while (*format != substitutionSymbol && *format != '\0')
-        {
-            if (*format != whitespaceSymbol)
-            {
-                os << *format;
-            }
-            ++format;
-        }
-
-        if (*format == substitutionSymbol)
-        {
-            ++format;
-            os << arg;
-        }
-
-        PrintFormat(os, format, args...);
-    }
-
-    template <typename... ArgTypes>
-    std::string PrintFormat(const char* format, const ArgTypes&... args)
-    {
-        std::stringstream ss;
-        PrintFormat(ss, format, args...);
-        return ss.str();
-    }
-
-    template <typename... ArgTypes>
-    MatchResult MatchFormat(const char*& content, const char* format, Match match, ArgTypes&... args)
-    {
-        auto matchResult = MatchToSubstitutionSymbol(content, format);
-
-        if (matchResult != MatchResult::success)
-        {
-            return matchResult;
-        }
-        if (*format == '\0')
-        {
-            return MatchResult::success;
-        }
-
-        // *format = substitutionSymbol
-        ++format;
-
-        const char* cStr = match;
-        matchResult = MatchToSubstitutionSymbol(content, cStr);
-        if (matchResult != MatchResult::success)
-        {
-            return matchResult;
-        }
-        if (*cStr != '\0')
-        {
-            return MatchResult::unexpectedPercentSymbol;
-        }
-
-        return MatchFormat(content, format, args...);
-    }
-
-    template <typename ArgType, typename... ArgTypes>
-    MatchResult MatchFormat(const char*& content, const char* format, ArgType& arg, ArgTypes&... args)
-    {
-        auto matchResult = MatchToSubstitutionSymbol(content, format);
-
-        if (matchResult != MatchResult::success)
-        {
-            return matchResult;
-        }
-        if (*format == '\0')
-        {
-            return MatchResult::success;
-        }
-
-        // *format = substitutionSymbol
-        ++format;
-
-        auto parserResult = Parse<std::remove_reference_t<ArgType>>(content, arg);
-        if (parserResult != ParseResult::success)
-        {
-            return MatchResult::parserError;
-        }
-
-        return MatchFormat(content, format, args...);
-    }
-
-    template <typename... ArgTypes>
-    void MatchFormatThrowsExceptions(const char*& content, const char* format, ArgTypes&&... args)
-    {
-        auto result = MatchFormat(content, format, args...);
-
-        if (result == MatchResult::success)
-        {
-            return;
-        }
-
-        std::string contentSnippet(content, 30);
-        std::string formatSnippet(format, 30);
-        auto snippets = "\"" + contentSnippet + "\" and \"" + formatSnippet + "\"";
-
-        switch (result)
-        {
-        case MatchResult::earlyEndOfContent:
-            throw utilities::InputException(utilities::InputExceptionErrors::badStringFormat, "content ended before format near: \"" + formatSnippet + "\"");
-
-        case MatchResult::mismatch:
-            throw utilities::InputException(utilities::InputExceptionErrors::badStringFormat, "mismatch between content and format near: " + snippets);
-
-        case MatchResult::parserError:
-            throw utilities::InputException(utilities::InputExceptionErrors::badStringFormat, "parser error near: " + snippets);
-
-        case MatchResult::missingArgument:
-            throw utilities::InputException(utilities::InputExceptionErrors::badStringFormat, "missing argument near: " + snippets);
-
-        case MatchResult::unexpectedPercentSymbol:
-            throw utilities::InputException(utilities::InputExceptionErrors::badStringFormat, "unexpected symbol '" + std::to_string(substitutionSymbol) + "' in string argument near: " + snippets);
-
-        case MatchResult::success:; // nothing
-        }
-    }
-} // namespace utilities
-} // namespace ell
diff --git a/libraries/utilities/tcc/FunctionUtils.tcc b/libraries/utilities/tcc/FunctionUtils.tcc
deleted file mode 100644
index 08e90dc6a..000000000
--- a/libraries/utilities/tcc/FunctionUtils.tcc
+++ /dev/null
@@ -1,55 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     FunctionUtils.tcc (utilities)
-//  Authors:  Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace utilities
-{
-    template <typename Function, typename... Functions>
-    void InOrderFunctionEvaluator(Function&& function, Functions&&... functions)
-    {
-        function();
-        InOrderFunctionEvaluator(std::forward<Functions>(functions)...);
-    }
-
-    template <typename FunctionType, typename ArgType, typename... ArgTypes>
-    void ApplyToEach(FunctionType&& function, ArgType&& arg, ArgTypes&&... args)
-    {
-        function(std::forward<ArgType>(arg));
-        ApplyToEach(std::forward<FunctionType>(function), std::forward<ArgTypes>(args)...);
-    }
-
-    template <size_t Index>
-    struct IndexTag
-    {
-        static constexpr size_t index = Index;
-    };
-
-    template <size_t Index>
-    constexpr size_t GetTagIndex(IndexTag<Index> tag)
-    {
-        return Index;
-    }
-
-    namespace detail
-    {
-        template <typename... Args, typename T, size_t... I>
-        std::tuple<Args...> VectorToTuple(std::vector<T> t, std::index_sequence<I...>)
-        {
-            return { t[I]... };
-        }
-    } // namespace detail
-
-    template <typename... Args, typename T>
-    std::tuple<Args...> VectorToTuple(std::vector<T> t)
-    {
-        return detail::VectorToTuple<Args...>(t, std::make_index_sequence<sizeof...(Args)>());
-    }
-
-} // namespace utilities
-} // namespace ell
diff --git a/libraries/utilities/tcc/JsonArchiver.tcc b/libraries/utilities/tcc/JsonArchiver.tcc
deleted file mode 100644
index 874214a43..000000000
--- a/libraries/utilities/tcc/JsonArchiver.tcc
+++ /dev/null
@@ -1,295 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     JsonArchiver.tcc (utilities)
-//  Authors:  Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace utilities
-{
-    //
-    // Serialization
-    //
-    template <typename ValueType, IsFundamental<ValueType> concept>
-    void JsonArchiver::WriteScalar(const char* name, const ValueType& value)
-    {
-        EnsureMaxPrecision<ValueType> precisionScope(_out);
-        auto indent = GetCurrentIndent();
-        bool hasName = name != std::string("");
-        auto endOfLine = hasName ? ",\n" : "";
-
-        FinishPreviousLine();
-        _out << indent;
-        if (hasName)
-        {
-            _out << "\"" << name << "\": ";
-        }
-        _out << value;
-        SetEndOfLine(endOfLine);
-    }
-
-    // Specialization for bool (though perhaps this should be an overload, not a specialization)
-    template <>
-    inline void JsonArchiver::WriteScalar(const char* name, const bool& value)
-    {
-        auto indent = GetCurrentIndent();
-        bool hasName = name != std::string("");
-        auto endOfLine = hasName ? ",\n" : "";
-
-        FinishPreviousLine();
-        _out << indent;
-        if (hasName)
-        {
-            _out << "\"" << name << "\": ";
-        }
-        _out << (value ? "true" : "false");
-        SetEndOfLine(endOfLine);
-    }
-
-    // This function is inline just so it appears next to the other Write* functions
-    inline void JsonArchiver::WriteScalar(const char* name, const char* value)
-    {
-        auto indent = GetCurrentIndent();
-        bool hasName = name != std::string("");
-        auto endOfLine = hasName ? ",\n" : "";
-
-        FinishPreviousLine();
-        _out << indent;
-        if (hasName)
-        {
-            _out << "\"" << name << "\": ";
-        }
-        _out << "\"" << JsonUtilities::EncodeString(value) << "\"";
-        SetEndOfLine(endOfLine);
-    }
-
-    inline void JsonArchiver::WriteScalar(const char* name, const std::string& value)
-    {
-        auto indent = GetCurrentIndent();
-        bool hasName = name != std::string("");
-        auto endOfLine = hasName ? ",\n" : "";
-
-        FinishPreviousLine();
-        _out << indent;
-        if (hasName)
-        {
-            _out << "\"" << name << "\": ";
-        }
-        _out << "\"" << JsonUtilities::EncodeString(value) << "\"";
-        SetEndOfLine(endOfLine);
-    }
-
-    template <typename ValueType>
-    void JsonArchiver::WriteArray(const char* name, const std::vector<ValueType>& array)
-    {
-        bool hasName = name != std::string("");
-        auto indent = GetCurrentIndent();
-        auto endOfLine = ",\n";
-
-        FinishPreviousLine();
-        _out << indent;
-        if (hasName)
-        {
-            _out << "\"" << name << "\": ";
-        }
-
-        _out << "[";
-
-        // reset indent
-        auto prevIndent = _indent;
-        _indent = 0;
-        auto numItems = array.size();
-        for (size_t index = 0; index < numItems; ++index)
-        {
-            Archive(array[index]);
-            if (index != numItems - 1)
-            {
-                _out << ", ";
-            }
-        }
-        // reset indent
-        _indent = prevIndent;
-        _out << "]";
-        SetEndOfLine(endOfLine);
-    }
-
-    //
-    // Deserialization
-    //
-    template <typename ValueType, IsIntegral<ValueType> concept>
-    void JsonUnarchiver::ReadScalar(const char* name, ValueType& value)
-    {
-        bool hasName = name != std::string("");
-        if (hasName)
-        {
-            MatchFieldName(name);
-        }
-
-        // read string
-        auto valueToken = _tokenizer.ReadNextToken();
-        if (std::is_same<ValueType, uint64_t>())
-            value = static_cast<ValueType>(std::stoull(valueToken));
-        else
-            value = static_cast<ValueType>(std::stoll(valueToken));
-
-        // eat a comma if it exists
-        if (hasName)
-        {
-            if (_tokenizer.PeekNextToken() == ",")
-            {
-                _tokenizer.ReadNextToken();
-            }
-        }
-    }
-
-    template <typename ValueType, IsFloatingPoint<ValueType> concept>
-    void JsonUnarchiver::ReadScalar(const char* name, ValueType& value)
-    {
-        bool hasName = name != std::string("");
-        if (hasName)
-        {
-            MatchFieldName(name);
-        }
-
-        // read string
-        auto valueToken = _tokenizer.ReadNextToken();
-        value = static_cast<ValueType>(std::stod(valueToken));
-
-        // eat a comma if it exists
-        if (hasName)
-        {
-            if (_tokenizer.PeekNextToken() == ",")
-            {
-                _tokenizer.ReadNextToken();
-            }
-        }
-    }
-
-    template <>
-    inline void JsonUnarchiver::ReadScalar(const char* name, bool& value)
-    {
-        bool hasName = name != std::string("");
-        if (hasName)
-        {
-            MatchFieldName(name);
-        }
-
-        // read string
-        auto valueToken = _tokenizer.ReadNextToken();
-        value = (valueToken == "true");
-
-        // eat a comma if it exists
-        if (hasName)
-        {
-            if (_tokenizer.PeekNextToken() == ",")
-            {
-                _tokenizer.ReadNextToken();
-            }
-        }
-    }
-
-    // This function is inline just so it appears next to the other Read* functions
-    inline void JsonUnarchiver::ReadScalar(const char* name, std::string& value)
-    {
-        bool hasName = name != std::string("");
-        if (hasName)
-        {
-            MatchFieldName(name);
-        }
-
-        _tokenizer.MatchToken("\"");
-        auto valueToken = _tokenizer.ReadNextToken();
-        value = JsonUtilities::DecodeString(valueToken);
-        _tokenizer.MatchToken("\"");
-
-        // eat a comma if it exists
-        if (hasName)
-        {
-            if (_tokenizer.PeekNextToken() == ",")
-            {
-                _tokenizer.ReadNextToken();
-            }
-        }
-    }
-
-    template <typename ValueType, IsFundamental<ValueType> concept>
-    void JsonUnarchiver::ReadArray(const char* name, std::vector<ValueType>& array)
-    {
-        bool hasName = name != std::string("");
-        if (hasName)
-        {
-            MatchFieldName(name);
-        }
-
-        _tokenizer.MatchToken("[");
-        while (true)
-        {
-            auto maybeEndArray = _tokenizer.PeekNextToken();
-            if (maybeEndArray == "]")
-            {
-                break;
-            }
-
-            ValueType obj;
-            Unarchive(obj);
-            array.push_back(obj);
-
-            if (_tokenizer.PeekNextToken() == ",")
-            {
-                _tokenizer.ReadNextToken();
-            }
-        }
-        _tokenizer.MatchToken("]");
-
-        // eat a comma if it exists
-        if (hasName)
-        {
-            if (_tokenizer.PeekNextToken() == ",")
-            {
-                _tokenizer.ReadNextToken();
-            }
-        }
-    }
-
-    inline void JsonUnarchiver::ReadArray(const char* name, std::vector<std::string>& array)
-    {
-        bool hasName = name != std::string("");
-        if (hasName)
-        {
-            MatchFieldName(name);
-        }
-
-        _tokenizer.MatchToken("[");
-        while (true)
-        {
-            auto maybeEndArray = _tokenizer.PeekNextToken();
-            if (maybeEndArray == "]")
-            {
-                break;
-            }
-
-            std::string obj;
-            Unarchive(obj);
-            array.push_back(obj);
-
-            if (_tokenizer.PeekNextToken() == ",")
-            {
-                _tokenizer.ReadNextToken();
-            }
-        }
-        _tokenizer.MatchToken("]");
-
-        // eat a comma if it exists
-        if (hasName)
-        {
-            if (_tokenizer.PeekNextToken() == ",")
-            {
-                _tokenizer.ReadNextToken();
-            }
-        }
-    }
-} // namespace utilities
-} // namespace ell
diff --git a/libraries/utilities/tcc/ObjectArchive.tcc b/libraries/utilities/tcc/ObjectArchive.tcc
deleted file mode 100644
index 12116220e..000000000
--- a/libraries/utilities/tcc/ObjectArchive.tcc
+++ /dev/null
@@ -1,58 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     ObjectArchive.tcc (utilities)
-//  Authors:  Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace utilities
-{
-    //
-    // ObjectArchive
-    //
-
-    template <typename ValueType>
-    void ObjectArchive::SetType(const ValueType& object)
-    {
-        UNUSED(object);
-        _typeName = TypeName<typename std::decay<ValueType>::type>::GetName();
-    }
-
-    template <typename ValueType>
-    void ObjectArchive::CopyValueTo(ValueType&& value) const
-    {
-        value = _value.GetValue<typename std::decay<ValueType>::type>();
-    }
-
-    template <typename ValueType>
-    void ObjectArchive::operator>>(ValueType&& value) const
-    {
-        value = _value.GetValue<typename std::decay<ValueType>::type>();
-    }
-
-    template <typename ValueType>
-    void ObjectArchive::SetValue(ValueType&& value)
-    {
-        SetType(value);
-        _value = value;
-    }
-
-    template <typename ValueType>
-    void ObjectArchive::operator<<(ValueType&& value)
-    {
-        SetValue(value);
-    }
-
-    //
-    // Functions
-    //
-    template <typename ValueType>
-    ValueType CreateObject(const ObjectArchive& archive)
-    {
-        return archive.GetValue<ValueType>();
-    }
-} // namespace utilities
-} // namespace ell
diff --git a/libraries/utilities/tcc/ObjectArchiver.tcc b/libraries/utilities/tcc/ObjectArchiver.tcc
deleted file mode 100644
index 3dd04aa34..000000000
--- a/libraries/utilities/tcc/ObjectArchiver.tcc
+++ /dev/null
@@ -1,148 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     ObjectArchiver.tcc (utilities)
-//  Authors:  Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace utilities
-{
-    //
-    // Serialization
-    //
-    template <typename ValueType, IsFundamental<ValueType> concept>
-    void ObjectArchiver::WriteScalar(const char* name, const ValueType& value)
-    {
-        if (std::string{ "" } == name)
-        {
-            _objectDescription << value;
-        }
-        else
-        {
-            _objectDescription[name] << value;
-        }
-    }
-
-    // Specialization for bool (though perhaps this should be an overload, not a specialization)
-    template <>
-    inline void ObjectArchiver::WriteScalar(const char* name, const bool& value)
-    {
-        if (std::string{ "" } == name)
-        {
-            _objectDescription << value;
-        }
-        else
-        {
-            _objectDescription[name] << value;
-        }
-    }
-
-    // This function is inline just so it appears next to the other Write* functions
-    inline void ObjectArchiver::WriteScalar(const char* name, const char* value)
-    {
-        if (std::string{ "" } == name)
-        {
-            _objectDescription << std::string{ value };
-        }
-        else
-        {
-            _objectDescription[name] << std::string{ value };
-        }
-    }
-
-    inline void ObjectArchiver::WriteScalar(const char* name, const std::string& value)
-    {
-        if (std::string{ "" } == name)
-        {
-            _objectDescription << value;
-        }
-        else
-        {
-            _objectDescription[name] << value;
-        }
-    }
-
-    template <typename ValueType>
-    void ObjectArchiver::WriteArray(const char* name, const std::vector<ValueType>& array)
-    {
-        if (std::string{ "" } == name)
-        {
-            _objectDescription << array;
-        }
-        else
-        {
-            _objectDescription[name] << array;
-        }
-    }
-
-    //
-    // Deserialization
-    //
-    template <typename ValueType, IsFundamental<ValueType> concept>
-    void ObjectArchiver::ReadScalar(const char* name, ValueType& value)
-    {
-        if (std::string{ "" } == name)
-        {
-            _objectDescription >> value;
-        }
-        else
-        {
-            _objectDescription[name] >> value;
-        }
-    }
-
-    template <>
-    inline void ObjectArchiver::ReadScalar(const char* name, bool& value)
-    {
-        if (std::string{ "" } == name)
-        {
-            _objectDescription >> value;
-        }
-        else
-        {
-            _objectDescription[name] >> value;
-        }
-    }
-
-    // This function is inline just so it appears next to the other Read* functions
-    inline void ObjectArchiver::ReadScalar(const char* name, std::string& value)
-    {
-        if (std::string{ "" } == name)
-        {
-            _objectDescription >> value;
-        }
-        else
-        {
-            _objectDescription[name] >> value;
-        }
-    }
-
-    template <typename ValueType, IsFundamental<ValueType> concept>
-    void ObjectArchiver::ReadArray(const char* name, std::vector<ValueType>& array)
-    {
-        if (std::string{ "" } == name)
-        {
-            _objectDescription >> array;
-        }
-        else
-        {
-            _objectDescription[name] >> array;
-        }
-    }
-
-    inline void ObjectArchiver::ReadArray(const char* name, std::vector<std::string>& array)
-    {
-        if (std::string{ "" } == name)
-        {
-            _objectDescription >> array;
-        }
-        else
-        {
-            _objectDescription[name] >> array;
-        }
-    }
-} // namespace utilities
-} // namespace ell
diff --git a/libraries/utilities/tcc/Optional.tcc b/libraries/utilities/tcc/Optional.tcc
deleted file mode 100644
index f2c7aee34..000000000
--- a/libraries/utilities/tcc/Optional.tcc
+++ /dev/null
@@ -1,49 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     Optional.tcc (utilities)
-//  Authors:  Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#pragma once
-
-namespace ell
-{
-namespace utilities
-{
-    template <typename T>
-    const T& Optional<T>::GetValue() const
-    {
-        if (!HasValue())
-        {
-            throw InputException(InputExceptionErrors::invalidArgument, "Error: called GetValue on an optional object without a value");
-        }
-        return _value;
-    }
-
-    template <typename T>
-    const T& Optional<T>::GetValue(const T& defaultValue) const
-    {
-        if (!HasValue())
-        {
-            return defaultValue;
-        }
-        return _value;
-    }
-
-    template <typename T>
-    void Optional<T>::SetValue(const T& value)
-    {
-        _value = value;
-        _hasValue = true;
-    }
-
-    template <typename T>
-    void Optional<T>::Clear()
-    {
-        _value = T();
-        _hasValue = false;
-    }
-} // namespace utilities
-} // namespace ell
diff --git a/libraries/utilities/tcc/OutputStreamImpostor.tcc b/libraries/utilities/tcc/OutputStreamImpostor.tcc
deleted file mode 100644
index 5b5ca5ef1..000000000
--- a/libraries/utilities/tcc/OutputStreamImpostor.tcc
+++ /dev/null
@@ -1,20 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     OutputStreamImpostor.tcc (utilities)
-//  Authors:  Chuck Jacobs, Ofer Dekel, Kern Handa
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace utilities
-{
-    template <typename T>
-    std::ostream& OutputStreamImpostor::operator<<(T&& value)
-    {
-        _outputStream.get() << value;
-        return _outputStream;
-    }
-} // namespace utilities
-} // namespace ell
diff --git a/libraries/utilities/tcc/ParallelTransformIterator.tcc b/libraries/utilities/tcc/ParallelTransformIterator.tcc
deleted file mode 100644
index 4422b6380..000000000
--- a/libraries/utilities/tcc/ParallelTransformIterator.tcc
+++ /dev/null
@@ -1,90 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     ParallelTransformIterator.tcc (utilities)
-//  Authors:  Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace utilities
-{
-    //
-    // ParallelTransformIterator definitions
-    //
-
-    template <typename InputIteratorType, typename OutType, typename FuncType, int MaxTasks>
-    ParallelTransformIterator<InputIteratorType, OutType, FuncType, MaxTasks>::ParallelTransformIterator(InputIteratorType& inIter, FuncType transformFunction) :
-        _inIter(inIter),
-        _transformFunction(transformFunction),
-        _currentOutputValid(false),
-        _currentIndex(0),
-        _endIndex(-1)
-    {
-        // Fill the buffer with futures that are the result of calling std::async(transformFunction) on inIter
-        int maxTasks = MaxTasks == 0 ? std::thread::hardware_concurrency() : MaxTasks;
-        if (maxTasks == 0) // if std::thread::hardware_concurrency isn't implemented, use DEFAULT_MAX_TASKS tasks (maybe this should be 1)
-        {
-            maxTasks = DEFAULT_MAX_TASKS;
-        }
-
-        _futures.reserve(maxTasks);
-        for (int index = 0; index < maxTasks; index++)
-        {
-            if (!_inIter.IsValid())
-            {
-                break;
-            }
-
-            _futures.emplace_back(std::async(std::launch::async, _transformFunction, _inIter.Get()));
-            _inIter.Next();
-        }
-    }
-
-    template <typename InputIteratorType, typename OutType, typename FuncType, int MaxTasks>
-    void ParallelTransformIterator<InputIteratorType, OutType, FuncType, MaxTasks>::Next()
-    {
-        if (!IsValid())
-        {
-            return;
-        }
-        _currentOutputValid = false;
-
-        // If necessary, create new std::future to handle next input
-        if (_inIter.IsValid())
-        {
-            _futures[_currentIndex] = std::async(std::launch::async, _transformFunction, _inIter.Get());
-            _inIter.Next();
-        }
-        else
-        {
-            if (_endIndex < 0) // Check if we've already noted the end index
-            {
-                _endIndex = _currentIndex;
-            }
-        }
-        _currentIndex = (_currentIndex + 1) % _futures.size();
-    };
-
-    template <typename InputIteratorType, typename OutType, typename FuncType, int MaxTasks>
-    OutType ParallelTransformIterator<InputIteratorType, OutType, FuncType, MaxTasks>::Get() const
-    {
-        // Need to cache output of current std::future, because calling std::future::get() twice is an error
-        if (!_currentOutputValid)
-        {
-            _currentOutput = _futures[_currentIndex].get();
-            _currentOutputValid = true;
-        }
-
-        return _currentOutput;
-    }
-
-    template <typename InputIteratorType, typename FuncType>
-    auto MakeParallelTransformIterator(InputIteratorType& inIterator, FuncType transformFunction) -> ParallelTransformIterator<InputIteratorType, decltype(transformFunction(inIterator.Get())), FuncType>
-    {
-        using OutType = decltype(transformFunction(inIterator.Get()));
-        return ParallelTransformIterator<InputIteratorType, OutType, FuncType>(inIterator, transformFunction);
-    }
-} // namespace utilities
-} // namespace ell
diff --git a/libraries/utilities/tcc/PropertyBag.tcc b/libraries/utilities/tcc/PropertyBag.tcc
deleted file mode 100644
index aab302b6a..000000000
--- a/libraries/utilities/tcc/PropertyBag.tcc
+++ /dev/null
@@ -1,27 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     PropertyBag.tcc (utilities)
-//  Authors:  Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace utilities
-{
-    template <typename ValueType>
-    void PropertyBag::SetEntry(const std::string& key, ValueType value)
-    {
-        _metadata[key] = Variant(value);
-    }
-
-    template <typename ValueType>
-    const ValueType& PropertyBag::GetEntry(const std::string& key) const
-    {
-        // This function throws an exception if there aren't any entries for the key, or if the underlying Variant doesn't contain a value of the correct type
-        const auto& variant = _metadata.at(key);
-        return variant.GetValue<ValueType>();
-    }
-} // namespace utilities
-} // namespace ell
diff --git a/libraries/utilities/tcc/RingBuffer.tcc b/libraries/utilities/tcc/RingBuffer.tcc
deleted file mode 100644
index 02f1f0167..000000000
--- a/libraries/utilities/tcc/RingBuffer.tcc
+++ /dev/null
@@ -1,69 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     RingBuffer.tcc (utilities)
-//  Authors:  Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace utilities
-{
-    //
-    // RingBuffer class
-    //
-    template <typename T>
-    RingBuffer<T>::RingBuffer(size_t size) :
-        _buffer(size),
-        _currentPos(0)
-    {
-    }
-
-    template <typename T>
-    size_t RingBuffer<T>::Size() const
-    {
-        return _buffer.size();
-    }
-
-    template <typename T>
-    void RingBuffer<T>::Resize(size_t size)
-    {
-        _buffer.resize(size);
-        _currentPos = 0;
-    }
-
-    template <typename T>
-    size_t RingBuffer<T>::GetBufferIndex(int entryIndex) const
-    {
-        int size = static_cast<int>(Size());
-        return (size + _currentPos - entryIndex) % (int)size; // Note: it's important the RHS argument to % is a signed int
-    }
-
-    template <typename T>
-    const T& RingBuffer<T>::operator[](int index) const
-    {
-        return _buffer[GetBufferIndex(index)];
-    }
-
-    template <typename T>
-    T& RingBuffer<T>::operator[](int index)
-    {
-        return _buffer[GetBufferIndex(index)];
-    }
-
-    template <typename T>
-    void RingBuffer<T>::Append(const T& val)
-    {
-        int size = static_cast<int>(Size());
-        _currentPos = (_currentPos + 1) % size;
-        _buffer[_currentPos] = val;
-    }
-
-    template <typename T>
-    void RingBuffer<T>::Fill(const T& val)
-    {
-        std::fill(_buffer.begin(), _buffer.end(), val);
-    }
-} // namespace utilities
-} // namespace ell
diff --git a/libraries/utilities/tcc/StlContainerIterator.tcc b/libraries/utilities/tcc/StlContainerIterator.tcc
deleted file mode 100644
index 9e1f6e0ce..000000000
--- a/libraries/utilities/tcc/StlContainerIterator.tcc
+++ /dev/null
@@ -1,53 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     StlContainerIterator.tcc (utilities)
-//  Authors:  Ofer Dekel, Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace utilities
-{
-    template <typename IteratorType>
-    StlContainerIteratorBase<IteratorType>::StlContainerIteratorBase(IteratorType begin, IteratorType end) :
-        _current(begin),
-        _end(end)
-    {
-    }
-
-    template <typename IteratorType>
-    void StlContainerIteratorBase<IteratorType>::Next()
-    {
-        if (IsValid())
-        {
-            ++_current;
-        }
-    }
-
-    template <typename IteratorType>
-    StlContainerIterator<IteratorType> MakeStlContainerIterator(IteratorType begin, IteratorType end)
-    {
-        return StlContainerIterator<IteratorType>(begin, end);
-    }
-
-    template <typename ContainerType>
-    StlContainerIterator<typename ContainerType::iterator, typename ContainerType::value_type> MakeStlContainerIterator(ContainerType& container)
-    {
-        return StlContainerIterator<typename ContainerType::iterator, typename ContainerType::value_type>(container.begin(), container.end());
-    }
-
-    template <typename IteratorType>
-    StlContainerReferenceIterator<IteratorType> MakeStlContainerReferenceIterator(IteratorType begin, IteratorType end)
-    {
-        return StlContainerReferenceIterator<IteratorType>(begin, end);
-    }
-
-    template <typename ContainerType>
-    StlContainerReferenceIterator<typename ContainerType::iterator, typename ContainerType::value_type> MakeStlContainerReferenceIterator(ContainerType& container)
-    {
-        return StlContainerReferenceIterator<typename ContainerType::iterator, typename ContainerType::value_type>(container.begin(), container.end());
-    }
-} // namespace utilities
-} // namespace ell
diff --git a/libraries/utilities/tcc/StlStridedIterator.tcc b/libraries/utilities/tcc/StlStridedIterator.tcc
deleted file mode 100644
index ae0cbc065..000000000
--- a/libraries/utilities/tcc/StlStridedIterator.tcc
+++ /dev/null
@@ -1,194 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     StlStridedIterator.tcc (utilities)
-//  Authors:  Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace utilities
-{
-    template <typename IteratorType>
-    StlStridedIterator<IteratorType>::StlStridedIterator(const IteratorType& iterator, difference_type stride) :
-        _iterator(iterator),
-        _stride(stride)
-    {
-    }
-
-    //
-    // Required operations for random-access iterators:
-    //
-
-    template <typename IteratorType>
-    bool StlStridedIterator<IteratorType>::operator==(const StlStridedIterator<IteratorType>& other) const
-    {
-        return _iterator == other._iterator;
-    }
-
-    template <typename IteratorType>
-    bool StlStridedIterator<IteratorType>::operator!=(const StlStridedIterator<IteratorType>& other) const
-    {
-        return _iterator != other._iterator;
-    }
-
-    template <typename IteratorType>
-    typename StlStridedIterator<IteratorType>::reference StlStridedIterator<IteratorType>::operator*()
-    {
-        return *_iterator;
-    }
-
-    template <typename IteratorType>
-    typename StlStridedIterator<IteratorType>::const_reference StlStridedIterator<IteratorType>::operator*() const
-    {
-        return *_iterator;
-    }
-
-    template <typename IteratorType>
-    typename StlStridedIterator<IteratorType>::pointer StlStridedIterator<IteratorType>::operator->()
-    {
-        return _iterator;
-    }
-
-    template <typename IteratorType>
-    typename StlStridedIterator<IteratorType>::const_pointer StlStridedIterator<IteratorType>::operator->() const
-    {
-        return _iterator;
-    }
-
-    template <typename IteratorType>
-    StlStridedIterator<IteratorType>& StlStridedIterator<IteratorType>::operator++()
-    {
-        _iterator += _stride;
-        return *this;
-    }
-
-    template <typename IteratorType>
-    StlStridedIterator<IteratorType> StlStridedIterator<IteratorType>::operator++(int)
-    {
-        auto temp = _iterator;
-        _iterator += _stride;
-        return { temp, _stride };
-    }
-
-    template <typename IteratorType>
-    StlStridedIterator<IteratorType>& StlStridedIterator<IteratorType>::operator--()
-    {
-        _iterator -= _stride;
-        return *this;
-    }
-
-    template <typename IteratorType>
-    StlStridedIterator<IteratorType> StlStridedIterator<IteratorType>::operator--(int)
-    {
-        auto temp = _iterator;
-        _iterator -= _stride;
-        return { temp, _stride };
-    }
-
-    template <typename IteratorType>
-    StlStridedIterator<IteratorType>& StlStridedIterator<IteratorType>::operator+=(difference_type increment)
-    {
-        _iterator += increment * _stride;
-        return *this;
-    }
-
-    template <typename IteratorType>
-    StlStridedIterator<IteratorType>& StlStridedIterator<IteratorType>::operator-=(difference_type increment)
-    {
-        _iterator -= increment * _stride;
-        return *this;
-    }
-
-    template <typename IteratorType>
-    bool StlStridedIterator<IteratorType>::operator>(const StlStridedIterator<IteratorType>& other) const
-    {
-        return _iterator > other._iterator;
-    }
-
-    template <typename IteratorType>
-    bool StlStridedIterator<IteratorType>::operator<(const StlStridedIterator<IteratorType>& other) const
-    {
-        return _iterator < other._iterator;
-    }
-
-    template <typename IteratorType>
-    bool StlStridedIterator<IteratorType>::operator>=(const StlStridedIterator<IteratorType>& other) const
-    {
-        return _iterator >= other._iterator;
-    }
-
-    template <typename IteratorType>
-    bool StlStridedIterator<IteratorType>::operator<=(const StlStridedIterator<IteratorType>& other) const
-    {
-        return _iterator <= other._iterator;
-    }
-
-    template <typename IteratorType>
-    typename StlStridedIterator<IteratorType>::reference StlStridedIterator<IteratorType>::operator[](typename StlStridedIterator<IteratorType>::size_type index)
-    {
-        return _iterator[index * _stride];
-    }
-
-    template <typename IteratorType>
-    typename StlStridedIterator<IteratorType>::const_reference StlStridedIterator<IteratorType>::operator[](typename StlStridedIterator<IteratorType>::size_type index) const
-    {
-        return _iterator[index * _stride];
-    }
-
-    template <typename IteratorType>
-    IteratorType StlStridedIterator<IteratorType>::GetBaseIterator() const
-    {
-        return _iterator;
-    }
-
-    template <typename IteratorType>
-    typename StlStridedIterator<IteratorType>::difference_type StlStridedIterator<IteratorType>::GetStride() const
-    {
-        return _stride;
-    }
-
-    //
-    // Basic math operators defined as standalone functions
-    //
-    template <typename IteratorType>
-    StlStridedIterator<IteratorType> operator+(const StlStridedIterator<IteratorType>& iterator, typename StlStridedIterator<IteratorType>::difference_type increment)
-    {
-        auto result = iterator;
-        result += increment;
-        return result;
-    }
-
-    template <typename IteratorType>
-    StlStridedIterator<IteratorType> operator+(typename StlStridedIterator<IteratorType>::difference_type increment, const StlStridedIterator<IteratorType>& iterator)
-    {
-        auto result = iterator;
-        result += increment;
-        return result;
-    }
-
-    template <typename IteratorType>
-    StlStridedIterator<IteratorType> operator-(const StlStridedIterator<IteratorType>& iterator, typename StlStridedIterator<IteratorType>::difference_type increment)
-    {
-        auto result = iterator;
-        result -= increment;
-        return result;
-    }
-
-    template <typename IteratorType>
-    typename StlStridedIterator<IteratorType>::difference_type operator-(const StlStridedIterator<IteratorType>& iterator1, const StlStridedIterator<IteratorType>& iterator2)
-    {
-        auto p1 = iterator1.GetBaseIterator();
-        auto p2 = iterator2.GetBaseIterator();
-        return (p1 - p2) / iterator1.GetStride();
-    }
-
-    // utility function for creating StlStridedIterator
-    template <typename IteratorType>
-    StlStridedIterator<IteratorType> MakeStlStridedIterator(const IteratorType& iterator, ptrdiff_t stride)
-    {
-        return { iterator, stride };
-    }
-} // namespace utilities
-} // namespace ell
diff --git a/libraries/utilities/tcc/TransformIterator.tcc b/libraries/utilities/tcc/TransformIterator.tcc
deleted file mode 100644
index b54b19255..000000000
--- a/libraries/utilities/tcc/TransformIterator.tcc
+++ /dev/null
@@ -1,30 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     TransformIterator.tcc (utilities)
-//  Authors:  Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace utilities
-{
-    //
-    // TransformIterator definitions
-    //
-    template <typename InputIteratorType, typename OutType, typename FuncType>
-    TransformIterator<InputIteratorType, OutType, FuncType>::TransformIterator(InputIteratorType& inIter, FuncType transformFunction) :
-        _inIter(inIter),
-        _transformFunction(transformFunction)
-    {
-    }
-
-    template <typename InputIteratorType, typename FnType>
-    auto MakeTransformIterator(InputIteratorType& inIterator, FnType transformFunction) -> TransformIterator<InputIteratorType, decltype(transformFunction(inIterator.Get())), FnType>
-    {
-        using OutType = decltype(transformFunction(inIterator.Get()));
-        return TransformIterator<InputIteratorType, OutType, FnType>(inIterator, transformFunction);
-    }
-} // namespace utilities
-} // namespace ell
diff --git a/libraries/utilities/tcc/TypeFactory.tcc b/libraries/utilities/tcc/TypeFactory.tcc
deleted file mode 100644
index bf52a523c..000000000
--- a/libraries/utilities/tcc/TypeFactory.tcc
+++ /dev/null
@@ -1,121 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     TypeFactory.tcc (utilities)
-//  Authors:  Ofer Dekel, Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace utilities
-{
-    template <typename BaseType>
-    std::unique_ptr<BaseType> TypeFactory<BaseType>::Construct(const std::string& typeName) const
-    {
-        auto entry = _typeMap.find(typeName);
-        if (entry == _typeMap.end())
-        {
-            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "type " + typeName + " not registered in TypeFactory<" + BaseType::GetTypeName() + ">");
-        }
-
-        return entry->second();
-    }
-
-    template <typename BaseType>
-    template <typename RuntimeType>
-    void TypeFactory<BaseType>::AddType()
-    {
-        std::string typeName = RuntimeType::GetTypeName();
-        AddType<RuntimeType>(typeName);
-    }
-
-    template <typename BaseType>
-    template <typename RuntimeType>
-    void TypeFactory<BaseType>::AddType(const std::string& typeName)
-    {
-        static_assert(std::is_base_of<BaseType, RuntimeType>::value, "incompatible base and runtime types in TypeFactory::Add");
-
-        DEBUG_THROW(_typeMap.find(typeName) != _typeMap.end(), std::logic_error(typeName + " has already been added to the type factory"));
-
-        _typeMap[typeName] = []() -> std::unique_ptr<BaseType> { return (std::make_unique<RuntimeType>()); };
-    }
-
-    //
-    // GenericTypeFactory
-    //
-    template <typename BaseType>
-    class TypeConstructorDerived : public TypeConstructorBase
-    {
-    public:
-        template <typename RuntimeType>
-        static std::unique_ptr<TypeConstructorDerived<BaseType>> NewTypeConstructor()
-        {
-            auto result = std::make_unique<TypeConstructorDerived<BaseType>>();
-            result->_createFunction = []() {
-                auto runtimePtr = new RuntimeType();
-                auto basePtr = dynamic_cast<BaseType*>(runtimePtr);
-                return std::unique_ptr<BaseType>(basePtr);
-            };
-            return result;
-        }
-
-        std::unique_ptr<BaseType> Construct() const
-        {
-            return _createFunction();
-        }
-
-    private:
-        std::function<std::unique_ptr<BaseType>()> _createFunction;
-    };
-
-    //
-    // TypeConstructorBase implementation
-    //
-    template <typename BaseType>
-    std::unique_ptr<BaseType> TypeConstructorBase::Construct() const
-    {
-        auto thisPtr = dynamic_cast<const TypeConstructorDerived<BaseType>*>(this);
-        if (thisPtr == nullptr)
-        {
-            throw InputException(InputExceptionErrors::typeMismatch, std::string{ "TypeConstructorBase::Construct called with wrong type. BaseType: " } + BaseType::GetTypeName());
-        }
-
-        return thisPtr->Construct();
-    }
-
-    //
-    // GenericTypeFactory implementation
-    //
-    template <typename BaseType>
-    std::unique_ptr<BaseType> GenericTypeFactory::Construct(const std::string& typeName) const
-    {
-        auto baseTypeName = std::string{ BaseType::GetTypeName() };
-        auto key = baseTypeName + "__" + typeName;
-        auto entry = _typeConstructorMap.find(key);
-        if (entry == _typeConstructorMap.end())
-        {
-            throw utilities::InputException(utilities::InputExceptionErrors::invalidArgument, "type " + typeName + " not registered in TypeFactory<" + BaseType::GetTypeName() + ">");
-        }
-
-        return entry->second->Construct<BaseType>();
-    }
-
-    template <typename BaseType, typename RuntimeType>
-    void GenericTypeFactory::AddType()
-    {
-        auto typeName = RuntimeType::GetTypeName();
-        AddType<BaseType, RuntimeType>(typeName);
-    }
-
-    template <typename BaseType, typename RuntimeType>
-    void GenericTypeFactory::AddType(const std::string& typeName)
-    {
-        auto baseTypeName = std::string{ BaseType::GetTypeName() };
-        auto key = baseTypeName + "__" + typeName;
-
-        auto derivedCreator = TypeConstructorDerived<BaseType>::template NewTypeConstructor<RuntimeType>().release();
-        _typeConstructorMap[key] = std::shared_ptr<TypeConstructorBase>(derivedCreator);
-    }
-} // namespace utilities
-} // namespace ell
diff --git a/libraries/utilities/tcc/TypeName.tcc b/libraries/utilities/tcc/TypeName.tcc
deleted file mode 100644
index 6d054c5e5..000000000
--- a/libraries/utilities/tcc/TypeName.tcc
+++ /dev/null
@@ -1,62 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     TypeName.tcc (utilities)
-//  Authors:  Ofer Dekel
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace utilities
-{
-    template <typename T>
-    std::string TypeName<T, std::enable_if_t<HasGetTypeName<std::decay_t<T>>::value>>::GetName()
-    {
-        return std::string(std::decay_t<T>::GetTypeName());
-    };
-
-    template <typename T>
-    std::string TypeName<T, std::enable_if_t<std::is_enum<std::decay_t<T>>::value>>::GetName()
-    {
-        return "enum";
-    };
-
-    template <typename T>
-    std::string TypeName<T*>::GetName()
-    {
-        return GetCompositeTypeName<T>("ptr");
-    }
-
-    template <typename T>
-    std::string TypeName<std::unique_ptr<T>>::GetName()
-    {
-        return GetCompositeTypeName<T>("unique_ptr");
-    }
-
-    template <typename T>
-    std::string TypeName<std::vector<T>>::GetName()
-    {
-        return GetCompositeTypeName<T>("vector");
-    }
-
-    template <typename T>
-    std::string TypeName<const std::vector<T>&>::GetName()
-    {
-        return GetCompositeTypeName<T>("vector");
-    }
-
-    template <typename Type>
-    std::string GetTypeName()
-    {
-        return TypeName<typename std::decay_t<Type>>::GetName();
-    }
-
-    template <typename... Types>
-    std::string GetCompositeTypeName(std::string baseType)
-    {
-        auto typeStrings = std::vector<std::string>{ TypeName<Types>::GetName()... };
-        return GetCompositeTypeName(baseType, typeStrings);
-    }
-} // namespace utilities
-} // namespace ell
diff --git a/libraries/utilities/tcc/Variant.tcc b/libraries/utilities/tcc/Variant.tcc
deleted file mode 100644
index 786c19d93..000000000
--- a/libraries/utilities/tcc/Variant.tcc
+++ /dev/null
@@ -1,908 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     Variant.tcc (utilities)
-//  Authors:  Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace utilities
-{
-    //
-    // Private code we'd like to hide:
-    //
-    namespace VariantDetail
-    {
-        //
-        // Operator helpers
-        //
-        template <typename ValueType>
-        using IsIncrementable = typename std::enable_if_t<(std::is_integral<std::decay_t<ValueType>>::value && !std::is_same<std::decay_t<ValueType>, bool>::value) || std::is_floating_point<std::decay_t<ValueType>>::value, bool>;
-        template <typename ValueType>
-        using IsNotIncrementable = typename std::enable_if_t<!((std::is_integral<std::decay_t<ValueType>>::value && !std::is_same<std::decay_t<ValueType>, bool>::value) || std::is_floating_point<std::decay_t<ValueType>>::value), bool>;
-
-        template <typename ValueType, IsIncrementable<ValueType> concept = true>
-        void Increment(ValueType& value)
-        {
-            ++value;
-        }
-
-        template <typename ValueType, IsNotIncrementable<ValueType> concept = true>
-        void Increment(ValueType& value)
-        {
-            UNUSED(value);
-            throw utilities::InputException(utilities::InputExceptionErrors::typeMismatch);
-        };
-
-        template <typename ValueType, IsIncrementable<ValueType> concept = true>
-        void Decrement(ValueType& value)
-        {
-            --value;
-        }
-
-        template <typename ValueType, IsNotIncrementable<ValueType> concept = true>
-        void Decrement(ValueType& value)
-        {
-            UNUSED(value);
-            throw utilities::InputException(utilities::InputExceptionErrors::typeMismatch);
-        }
-
-        //
-        // VariantBase --- private implementation class used by Variant
-        //
-        class VariantBase : public ArchivedAsPrimitive
-        {
-        public:
-            virtual ~VariantBase() = default;
-
-        protected:
-            VariantBase(std::type_index type);
-            virtual std::unique_ptr<VariantBase> Clone() const = 0;
-            virtual std::string ToString() const = 0;
-            virtual std::string GetStoredTypeName() const = 0;
-            virtual bool IsPrimitiveType() const = 0;
-            virtual bool IsIntegralType() const = 0;
-            virtual bool IsFloatingPointType() const = 0;
-            virtual bool IsEnumType() const = 0;
-
-            virtual void ParseInto(const std::string& s) = 0;
-            virtual bool TryParseInto(const std::string& s) = 0;
-
-            virtual bool IsIArchivable() const = 0;
-            virtual bool IsPointer() const = 0;
-
-            // Getting / setting value as a fundamental type (of the largest width)
-            virtual intmax_t GetIntValue() const = 0;
-            virtual long double GetFloatValue() const = 0;
-            virtual void SetIntValue(intmax_t value) = 0;
-            virtual void SetFloatValue(long double value) = 0;
-            static std::string GetTypeName() { return "VariantBase"; }
-            std::string GetRuntimeTypeName() const override { return GetTypeName(); }
-
-            // operators
-            virtual void operator++() { throw utilities::InputException(utilities::InputExceptionErrors::typeMismatch); };
-            virtual void operator++(int) { throw utilities::InputException(utilities::InputExceptionErrors::typeMismatch); };
-            virtual void operator--() { throw utilities::InputException(utilities::InputExceptionErrors::typeMismatch); };
-            virtual void operator--(int) { throw utilities::InputException(utilities::InputExceptionErrors::typeMismatch); };
-
-        private:
-            friend class ell::utilities::Variant;
-
-            template <typename ValueType>
-            ValueType& GetValue();
-
-            template <typename ValueType>
-            const ValueType& GetValue() const;
-
-            template <typename ValueType>
-            void SetValue(ValueType&& value);
-
-            std::type_index _type; // redundant with type in Variant class.
-        };
-
-        //
-        // VariantDerived definition --- private implementation class used by Variant
-        //
-        template <typename ValueType>
-        class VariantDerived : public VariantBase
-        {
-        public:
-            /// <summary> Default constructor </summary>
-            VariantDerived();
-
-            /// <summary> Constructor </summary>
-            ///
-            /// <param name="val"> Value to wrap in a variant </param>
-            VariantDerived(const ValueType& val);
-
-        protected:
-            ValueType& GetValue() { return _value; }
-            const ValueType& GetValue() const { return _value; }
-            void SetValue(const ValueType& value);
-
-            std::unique_ptr<VariantBase> Clone() const override;
-            std::string ToString() const override;
-            std::string GetStoredTypeName() const override;
-            bool IsPrimitiveType() const override { return std::is_fundamental<ValueType>::value; }
-            bool IsIntegralType() const override { return std::is_integral<ValueType>::value; }
-            bool IsFloatingPointType() const override { return std::is_floating_point<ValueType>::value; }
-            bool IsEnumType() const override { return std::is_enum<ValueType>::value; }
-
-            void ParseInto(const std::string& s) override;
-            bool TryParseInto(const std::string& s) override;
-
-            bool IsIArchivable() const override { return !IsPrimitiveType(); }
-            bool IsPointer() const override { return std::is_pointer<ValueType>::value; }
-
-            // Getting value as a fundamental type (of the largest width)
-            intmax_t GetIntValue() const override;
-            long double GetFloatValue() const override;
-            void SetIntValue(intmax_t value) override;
-            void SetFloatValue(long double value) override;
-
-            void operator++() override { Increment(_value); }
-            void operator++(int) override { Increment(_value); }
-            void operator--() override { Decrement(_value); }
-            void operator--(int) override { Decrement(_value); }
-
-            static std::string GetTypeName() { return GetCompositeTypeName<ValueType>("VariantDerived"); }
-            std::string GetRuntimeTypeName() const override { return GetTypeName(); }
-            void WriteToArchive(utilities::Archiver& archiver) const override;
-            void ReadFromArchive(utilities::Unarchiver& archiver) override;
-
-        private:
-            friend class Variant;
-            friend class VariantBase;
-
-            ValueType _value;
-            std::string _typeName;
-        };
-
-        // GetValueString
-        using std::to_string;
-        using utilities::to_string;
-        inline std::string to_string(const std::string& str)
-        {
-            return str;
-        }
-
-        template <typename ValueType>
-        auto GetValueStringHelper(const ValueType& value, ...) -> std::string
-        {
-            UNUSED(value);
-            return "";
-        }
-
-        template <typename ValueType>
-        auto GetValueStringHelper(const ValueType& value, int) -> decltype(to_string(value), std::string())
-        {
-            return to_string(value);
-        }
-
-        template <typename ValueType>
-        auto GetValueString(const ValueType& value) -> std::string
-        {
-            return GetValueStringHelper(value, 0);
-        }
-
-        // TryParseValue
-
-        template <typename ValueType>
-        bool TryParseValueHelper(const std::string& s, ValueType& value, ...)
-        {
-            UNUSED(s);
-            UNUSED(value);
-            return false;
-        }
-
-        inline bool TryParseValueHelper(const std::string& s, std::string& value, int)
-        {
-            value = s;
-            return true;
-        }
-
-        template <typename ValueType, IsFundamental<ValueType> = true>
-        bool TryParseValueHelper(const std::string& s, ValueType& value, int)
-        {
-            std::stringstream sstr(s);
-            ValueType v;
-            sstr >> v;
-            value = v;
-            return !sstr.fail();
-        }
-
-        template <typename ValueType, IsEnum<ValueType> = true>
-        bool TryParseValueHelper(const std::string& s, ValueType& value, int)
-        {
-            std::stringstream sstr(s);
-            std::underlying_type_t<ValueType> v;
-            sstr >> v;
-            value = static_cast<ValueType>(v);
-            return !sstr.fail();
-        }
-
-        template <typename ValueType>
-        bool TryParseArchivable(std::string s, ValueType& value)
-        {
-            if (std::is_base_of<ArchivedAsPrimitive, std::decay_t<ValueType>>())
-            {
-                s = std::string("\"") + s + std::string("\"");
-            }
-
-            std::stringstream stream(s);
-            SerializationContext context;
-            JsonUnarchiver ar(stream, context);
-            ar >> value;
-            return true;
-        }
-
-        template <typename ValueType, IsIArchivable<ValueType> = true>
-        bool TryParseValueHelper(const std::string& s, ValueType& value, int)
-        {
-            return TryParseArchivable(s, value);
-        }
-
-        template <typename ValueType, IsVector<ValueType> = true>
-        bool TryParseValueHelper(const std::string& s, ValueType& value, int)
-        {
-            return TryParseArchivable(s, value);
-        }
-
-        template <typename ValueType>
-        bool TryParseValue(const std::string& s, ValueType& value)
-        {
-            return TryParseValueHelper(s, value, 0);
-        }
-
-        // TryConvertValue
-        template <typename InputValueType, typename OutputValueType>
-        bool TryConvertValueHelper(const InputValueType& input, OutputValueType& output, ...)
-        {
-            UNUSED(input, output);
-            return false;
-        }
-
-        inline bool TryConvertValueHelper(const std::string& inValue, std::string& outValue, int)
-        {
-            outValue = inValue;
-            return true;
-        }
-
-        // int, float, enum, bool <--> int, float, enum, bool  (maybe not enum<-->float)
-
-        // fundamental -> int
-        template <typename InputValueType, typename OutputValueType, IsFundamental<InputValueType> = true, IsNonBooleanIntegral<OutputValueType> = true>
-        bool TryConvertValueHelper(const InputValueType& inValue, OutputValueType& outValue, int)
-        {
-            outValue = static_cast<OutputValueType>(inValue);
-            return true;
-        }
-
-        // fundamental -> bool
-        template <typename InputValueType, typename OutputValueType, IsFundamental<InputValueType> = true, IsBoolean<OutputValueType> = true>
-        bool TryConvertValueHelper(const InputValueType& inValue, OutputValueType& outValue, int)
-        {
-            outValue = static_cast<OutputValueType>(inValue != 0);
-            return true;
-        }
-
-        // fundamental -> float
-        template <typename InputValueType, typename OutputValueType, IsFundamental<InputValueType> = true, IsFloatingPoint<OutputValueType> = true>
-        bool TryConvertValueHelper(const InputValueType& inValue, OutputValueType& outValue, int)
-        {
-            outValue = static_cast<OutputValueType>(inValue);
-            return true;
-        }
-
-        // int -> enum
-        template <typename InputValueType, typename OutputValueType, IsIntegral<InputValueType> = true, IsEnum<OutputValueType> = true>
-        bool TryConvertValueHelper(const InputValueType& inValue, OutputValueType& outValue, int)
-        {
-            outValue = static_cast<OutputValueType>(inValue);
-            return true;
-        }
-
-        // main function
-        template <typename InputValueType, typename OutputValueType>
-        bool TryConvertValue(const InputValueType& inValue, OutputValueType& outValue)
-        {
-            return TryConvertValueHelper(inValue, outValue, 0);
-        }
-
-        // Setting values from fundamental types
-        template <typename ValueType>
-        using CanCastToInt = std::enable_if_t<std::is_fundamental<std::decay_t<ValueType>>::value || std::is_enum<ValueType>::value, bool>;
-
-        template <typename ValueType>
-        using CanNotCastToInt = std::enable_if_t<!(std::is_fundamental<std::decay_t<ValueType>>::value || std::is_enum<ValueType>::value), bool>;
-
-        template <typename T, CanCastToInt<T> = true>
-        intmax_t CastToIntMax(T&& value)
-        {
-            return static_cast<intmax_t>(value);
-        }
-
-        template <typename T, CanNotCastToInt<T> = true>
-        intmax_t CastToIntMax(T&& value)
-        {
-            UNUSED(value);
-            return 0;
-        }
-
-        template <typename T, IsFundamental<T> = true>
-        long double CastToLongDouble(T&& value)
-        {
-            return static_cast<long double>(value);
-        }
-
-        template <typename T, IsNotFundamental<T> = true>
-        long double CastToLongDouble(T&& value)
-        {
-            UNUSED(value);
-            return 0;
-        }
-
-        // Helper functions to allow SFINAE to select between implementations of Archiver::operator<<
-        template <typename ValueType, IsArchivableVariantType<ValueType> concept = true>
-        void ArchiveValue(Archiver& archiver, ValueType&& value)
-        {
-            archiver << value;
-        }
-
-        template <typename ValueType, IsArchivableVariantType<ValueType> concept = true>
-        void ArchiveValue(Archiver& archiver, const std::vector<ValueType>& value)
-        {
-            archiver << value;
-        }
-
-        template <typename ValueType, IsNotArchivableVariantType<ValueType> concept = true, IsNotVector<ValueType> concept2 = true>
-        void ArchiveValue(Archiver& archiver, ValueType&& value)
-        {
-            UNUSED(archiver, value);
-            throw InputException(InputExceptionErrors::typeMismatch, std::string("VariantBase::ArchiveValue called with unarchivable type: ") + GetTypeName<ValueType>());
-        }
-
-        template <typename ValueType, IsNotArchivableVariantType<ValueType> concept = true>
-        void ArchiveValue(Archiver& archiver, const std::vector<ValueType>& value)
-        {
-            UNUSED(archiver, value);
-            throw InputException(InputExceptionErrors::typeMismatch, std::string("VariantBase::ArchiveValue called with unarchivable vector type: ") + GetTypeName<ValueType>());
-        }
-
-        //
-        // Helper functions to allow SFINAE to select between implementations of Unarchiver::operator>>
-        //
-        template <typename ValueType, IsArchivableVariantType<ValueType> concept = true>
-        void UnarchiveValue(Unarchiver& archiver, ValueType&& value)
-        {
-            archiver >> value;
-        }
-
-        template <typename ValueType, IsArchivableVariantType<ValueType> concept = true>
-        void UnarchiveValue(Unarchiver& archiver, std::vector<ValueType>& value)
-        {
-            archiver >> value;
-        }
-
-        template <typename ValueType, IsNotArchivableVariantType<ValueType> concept = true, IsNotVector<ValueType> concept2 = true>
-        void UnarchiveValue(Unarchiver& archiver, ValueType&& value)
-        {
-            UNUSED(archiver, value);
-            throw InputException(InputExceptionErrors::typeMismatch, std::string("VariantBase::ArchiveValue called with unarchivable type: ") + GetTypeName<ValueType>());
-        }
-
-        template <typename ValueType, IsNotArchivableVariantType<ValueType> concept = true>
-        void UnarchiveValue(Unarchiver& archiver, std::vector<ValueType>& value)
-        {
-            UNUSED(archiver, value);
-            throw InputException(InputExceptionErrors::typeMismatch, std::string("VariantBase::UnarchiveValue called with unarchivable type: ") + GetTypeName<typename std::decay<ValueType>::type>());
-        }
-
-        //
-        // VariantBase implementation
-        //
-        inline VariantBase::VariantBase(std::type_index type) :
-            _type(type){};
-
-        template <typename ValueType>
-        ValueType& VariantBase::GetValue()
-        {
-            auto thisPtr = dynamic_cast<const VariantDetail::VariantDerived<std::decay_t<ValueType>>*>(this);
-            if (thisPtr == nullptr)
-            {
-                throw InputException(InputExceptionErrors::typeMismatch, std::string{ "VariantBase::GetValue called with wrong type. Called with: " + TypeName<ValueType>::GetName() + ", but stored value is: " + GetStoredTypeName() });
-            }
-
-            return thisPtr->GetValue();
-        }
-
-        template <typename ValueType>
-        const ValueType& VariantBase::GetValue() const
-        {
-            const auto thisPtr = dynamic_cast<const VariantDetail::VariantDerived<std::decay_t<ValueType>>*>(this);
-            if (thisPtr == nullptr)
-            {
-                throw InputException(InputExceptionErrors::typeMismatch, std::string{ "VariantBase::GetValue called with wrong type. Called with: " + TypeName<ValueType>::GetName() + ", but stored value is: " + GetStoredTypeName() });
-            }
-
-            return thisPtr->GetValue();
-        }
-
-        template <typename ValueType>
-        void VariantBase::SetValue(ValueType&& value)
-        {
-            auto thisPtr = dynamic_cast<VariantDetail::VariantDerived<std::decay_t<ValueType>>*>(this);
-            if (thisPtr == nullptr)
-            {
-                throw InputException(InputExceptionErrors::typeMismatch, std::string{ "VariantBase::SetValue called with wrong type. Type: " + TypeName<ValueType>::GetName() });
-            }
-            thisPtr->SetValue(value);
-        }
-
-        //
-        // VariantDerived implementation
-        //
-        template <typename ValueType>
-        VariantDerived<ValueType>::VariantDerived() :
-            VariantBase(typeid(ValueType)),
-            _value(ValueType()),
-            _typeName(TypeName<ValueType>::GetName())
-        {
-        }
-
-        template <typename ValueType>
-        VariantDerived<ValueType>::VariantDerived(const ValueType& val) :
-            VariantBase(typeid(ValueType)),
-            _value(val),
-            _typeName(TypeName<ValueType>::GetName())
-        {
-        }
-
-        template <typename ValueType>
-        void VariantDerived<ValueType>::SetValue(const ValueType& value)
-        {
-            _value = value;
-        }
-
-        template <typename ValueType>
-        std::unique_ptr<VariantBase> VariantDerived<ValueType>::Clone() const
-        {
-            auto ptr = static_cast<VariantBase*>(new VariantDerived<ValueType>(_value));
-            return std::unique_ptr<VariantBase>(ptr);
-        }
-
-        template <typename ValueType>
-        std::string VariantDerived<ValueType>::ToString() const
-        {
-            return GetValueString(_value);
-        }
-
-        template <typename ValueType>
-        std::string VariantDerived<ValueType>::GetStoredTypeName() const
-        {
-            return TypeName<std::decay_t<ValueType>>::GetName();
-        }
-
-        template <typename ValueType>
-        intmax_t VariantDerived<ValueType>::GetIntValue() const
-        {
-            return CastToIntMax(_value);
-        }
-
-        template <typename ValueType>
-        long double VariantDerived<ValueType>::GetFloatValue() const
-        {
-            return CastToLongDouble(_value);
-        }
-
-        template <typename ValueType>
-        void VariantDerived<ValueType>::SetIntValue(intmax_t value)
-        {
-            bool success = TryConvertValue(value, _value);
-            if (!success)
-            {
-                throw utilities::InputException(utilities::InputExceptionErrors::typeMismatch);
-            }
-        }
-
-        template <typename ValueType>
-        void VariantDerived<ValueType>::SetFloatValue(long double value)
-        {
-            bool success = TryConvertValue(value, _value);
-            if (!success)
-            {
-                throw utilities::InputException(utilities::InputExceptionErrors::typeMismatch);
-            }
-        }
-
-        template <typename ValueType>
-        void VariantDerived<ValueType>::ParseInto(const std::string& s)
-        {
-            if (!TryParseInto(s))
-            {
-                throw utilities::InputException(utilities::InputExceptionErrors::typeMismatch);
-            }
-        }
-
-        template <typename ValueType>
-        bool VariantDerived<ValueType>::TryParseInto(const std::string& s)
-        {
-            return TryParseValue(s, _value);
-        }
-
-        // template <typename ValueType>
-        // void VariantDerived<ValueType>::operator++()
-        // {
-        // }
-
-        // template <typename ValueType>
-        // void VariantDerived<ValueType>::operator++(int)
-        // {
-        // }
-
-        // template <typename ValueType>
-        // void VariantDerived<ValueType>::operator--()
-        // {
-        // }
-
-        // template <typename ValueType>
-        // void VariantDerived<ValueType>::operator--(int)
-        // {
-        // }
-
-        template <typename ValueType>
-        void VariantDerived<ValueType>::WriteToArchive(utilities::Archiver& archiver) const
-        {
-            ArchiveValue(archiver, GetValue());
-            // archiver << _value;
-        }
-
-        template <typename ValueType>
-        void VariantDerived<ValueType>::ReadFromArchive(utilities::Unarchiver& archiver)
-        {
-            UnarchiveValue(archiver, _value);
-            // archiver >> _value;
-        }
-
-    } // namespace VariantDetail
-
-    //
-    // Variant implementation
-    //
-    template <typename ValueType, ValueType Default>
-    Variant::Variant() :
-        _type(std::type_index(typeid(ValueType)))
-    {
-        auto derivedPtr = new VariantDetail::VariantDerived<std::decay_t<ValueType>>(Default);
-        auto basePtr = static_cast<VariantDetail::VariantBase*>(derivedPtr);
-        _value = std::unique_ptr<VariantDetail::VariantBase>(basePtr);
-    }
-
-    template <typename ValueType, IsNotVariant<ValueType> concept>
-    Variant::Variant(ValueType&& value) :
-        _type(std::type_index(typeid(ValueType)))
-    {
-        static_assert(!std::is_same<std::decay_t<ValueType>, Variant>(), "Can't make a Variant of a Variant");
-        auto derivedPtr = new VariantDetail::VariantDerived<std::decay_t<ValueType>>(std::forward<ValueType>(value));
-        auto basePtr = static_cast<VariantDetail::VariantBase*>(derivedPtr);
-        _value = std::unique_ptr<VariantDetail::VariantBase>(basePtr);
-    }
-
-    template <typename ValueType>
-    const ValueType& Variant::GetValue() const
-    {
-        if (!_value)
-        {
-            throw InputException(InputExceptionErrors::nullReference, std::string{ "Variant::GetValue called on empty Variant" });
-        }
-
-        if (std::type_index(typeid(ValueType)) != _type)
-        {
-            throw InputException(InputExceptionErrors::typeMismatch, std::string{ "VariantBase::GetValue called with wrong type. Called with: " + TypeName<ValueType>::GetName() + ", but stored value is: " + GetStoredTypeName() });
-        }
-
-        return GetBasePointer()->GetValue<ValueType>();
-    }
-
-    template <typename ValueType>
-    bool Variant::TryGetValue(ValueType& value) const
-    {
-        if (!IsType<ValueType>())
-        {
-            return false;
-        }
-
-        value = GetBasePointer()->GetValue<ValueType>();
-        return true;
-    }
-
-    template <typename ValueType>
-    void Variant::SetValue(ValueType&& value)
-    {
-        if (!TrySetValue(value))
-        {
-            throw InputException(InputExceptionErrors::typeMismatch, std::string{ "Variant::SetValue called with wrong type. Type: " + TypeName<ValueType>::GetName() });
-        }
-    }
-
-    template <typename ValueType>
-    bool Variant::TrySetValue(ValueType&& value)
-    {
-        if (!IsType<std::decay_t<ValueType>>())
-        {
-            return false;
-        }
-
-        _value->SetValue(value);
-        return true;
-    }
-
-    template <typename ValueType>
-    void Variant::ResetValue(ValueType&& value)
-    {
-        static_assert(!std::is_same<std::decay_t<ValueType>, Variant>(), "Can't make a Variant of a Variant");
-
-        _type = std::type_index(typeid(ValueType));
-        auto derivedPtr = new VariantDetail::VariantDerived<std::decay_t<ValueType>>(std::forward<ValueType>(value));
-        auto basePtr = static_cast<VariantDetail::VariantBase*>(derivedPtr);
-        _value = std::unique_ptr<VariantDetail::VariantBase>(basePtr);
-    }
-
-    template <typename ValueType>
-    ValueType Variant::GetValueAs() const
-    {
-        ValueType result;
-        if (!TryGetValueAs<ValueType>(result))
-        {
-            throw InputException(InputExceptionErrors::typeMismatch, "Could not cast Variant value to given type");
-        }
-        return result;
-    }
-
-    template <typename ValueType>
-    bool Variant::TryGetValueAs(ValueType& value) const
-    {
-        if (_value == nullptr)
-        {
-            return false;
-        }
-
-        if (IsType<ValueType>())
-        {
-            return TryGetValue<ValueType>(value);
-        }
-        else if (std::is_same<ValueType, std::string>())
-        {
-            return VariantDetail::TryConvertValue(_value->ToString(), value);
-        }
-        else if (IsIntegralType())
-        {
-            return VariantDetail::TryConvertValue(_value->GetIntValue(), value);
-        }
-        else if (IsEnumType())
-        {
-            return VariantDetail::TryConvertValue(_value->GetIntValue(), value);
-        }
-        else if (IsFloatingPointType())
-        {
-            return VariantDetail::TryConvertValue(_value->GetFloatValue(), value);
-        }
-        return TryGetValue<ValueType>(value);
-    }
-
-    template <typename ValueType>
-    void Variant::SetValueFrom(ValueType&& value)
-    {
-        if (!TrySetValueFrom(value))
-        {
-            throw InputException(InputExceptionErrors::typeMismatch, "Could not set Variant value from given type");
-        }
-    }
-
-    template <typename ValueType>
-    bool Variant::TrySetValueFrom(ValueType&& value)
-    {
-        using std::to_string;
-        using utilities::to_string;
-
-        static_assert(!std::is_same<std::decay_t<ValueType>, Variant>(), "Can't set value from a Variant");
-
-        if (_value == nullptr)
-        {
-            return false;
-        }
-
-        if (IsType<ValueType>())
-        {
-            return TrySetValue(value);
-        }
-        else if (IsType<std::string>())
-        {
-            return TrySetValue(to_string(value));
-        }
-        else if (IsIntegralType())
-        {
-            // If we are integral, we can accept integral, enum, or floating-point types
-            if (std::is_fundamental<std::decay_t<ValueType>>::value || std::is_enum<std::decay_t<ValueType>>::value)
-            {
-                _value->SetIntValue(VariantDetail::CastToIntMax(value));
-                return true;
-            }
-            return false;
-        }
-        else if (IsEnumType())
-        {
-            // If we are integral, we can accept integral or enum types
-            if (std::is_integral<std::decay_t<ValueType>>::value || std::is_enum<std::decay_t<ValueType>>::value)
-            {
-                _value->SetIntValue(VariantDetail::CastToIntMax(value));
-                return true;
-            }
-            return false;
-        }
-        else if (IsFloatingPointType())
-        {
-            // If we are integral, we can accept integral or floating-point types
-            if (std::is_fundamental<std::decay_t<ValueType>>::value)
-            {
-                _value->SetFloatValue(VariantDetail::CastToLongDouble(value));
-                return true;
-            }
-        }
-        return TrySetValue(value);
-    }
-
-    inline bool Variant::TrySetValueFrom(Variant& other)
-    {
-        return TrySetValueFrom(static_cast<const Variant&>(other));
-    }
-
-    inline bool Variant::TrySetValueFrom(const Variant& other)
-    {
-        if (IsSameTypeAs(other))
-        {
-            if (other._value)
-            {
-                _value = other._value->Clone();
-            }
-            else
-            {
-                _value = nullptr;
-            }
-            return true;
-        }
-
-        if ((IsIntegralType() || IsEnumType()) && (other.IsIntegralType() || other.IsEnumType()))
-        {
-            _value->SetIntValue(other._value->GetIntValue());
-            return true;
-        }
-        else if ((IsIntegralType() || IsEnumType()) && (other.IsFloatingPointType()))
-        {
-            _value->SetIntValue(static_cast<intmax_t>(other._value->GetFloatValue()));
-            return true;
-        }
-        else if ((IsFloatingPointType()) && (other.IsIntegralType() || other.IsEnumType()))
-        {
-            _value->SetFloatValue(static_cast<long double>(other._value->GetIntValue()));
-            return true;
-        }
-        else if ((IsFloatingPointType()) && (other.IsFloatingPointType()))
-        {
-            _value->SetFloatValue(other._value->GetFloatValue());
-            return true;
-        }
-
-        return false;
-    }
-
-    template <typename ValueType, IsNotVariant<ValueType> concept>
-    Variant& Variant::operator=(ValueType&& value)
-    {
-        static_assert(!std::is_same<std::decay_t<ValueType>, Variant>(), "Can't make a Variant of a Variant");
-        _type = std::type_index(typeid(ValueType));
-        auto derivedPtr = new VariantDetail::VariantDerived<std::decay_t<ValueType>>(std::forward<ValueType>(value));
-        auto basePtr = static_cast<VariantDetail::VariantBase*>(derivedPtr);
-        _value = std::unique_ptr<VariantDetail::VariantBase>(basePtr);
-        return *this;
-    }
-
-    template <typename ValueType>
-    bool Variant::IsType() const
-    {
-        return (_value != nullptr && std::type_index(typeid(ValueType)) == _type);
-    }
-
-    template <typename ValueType>
-    void Variant::RegisterArchivableVariantType(VariantTypeRegistry& registry)
-    {
-        registry.SetVariantTypeFunction<ValueType>([](Variant& variant) {
-            variant.ResetValue<ValueType>(ValueType());
-        });
-    }
-
-    template <typename ValueType>
-    void Variant::RegisterArchivableVariantVectorType(VariantTypeRegistry& registry)
-    {
-        using VectorType = std::vector<ValueType>;
-        registry.SetVariantTypeFunction<VectorType>([](Variant& variant) {
-            variant.ResetValue<VectorType>(VectorType());
-        });
-    }
-
-    template <typename ValueType, typename... Args>
-    Variant MakeVariant(Args&&... args)
-    {
-        auto derivedPtr = new VariantDetail::VariantDerived<std::decay_t<ValueType>>(std::forward<Args>(args)...);
-        auto basePtr = static_cast<VariantDetail::VariantBase*>(derivedPtr);
-        return Variant(std::type_index(typeid(ValueType)), std::unique_ptr<VariantDetail::VariantBase>(basePtr));
-    }
-
-    //
-    // Helper functions
-    //
-
-    // GetTupleFromVariants
-    template <typename ArgsTupleType, size_t... Sequence>
-    ArgsTupleType GetArgTupleFromVariantsHelper(const std::vector<utilities::Variant>& args, std::index_sequence<Sequence...>)
-    {
-        return ArgsTupleType({ args[Sequence].GetValue<typename std::tuple_element<Sequence, ArgsTupleType>::type>() }...);
-    }
-
-    /// Fills in tuple with values taken from vector of Variants
-    template <typename ArgsTupleType>
-    ArgsTupleType GetTupleFromVariants(const std::vector<Variant>& args)
-    {
-        return GetArgTupleFromVariantsHelper<ArgsTupleType>(args, std::make_index_sequence<std::tuple_size<ArgsTupleType>::value>());
-    }
-
-    template <typename FunctionType>
-    utilities::FunctionArgTypes<FunctionType> GetArgTupleFromVariants(FunctionType& function, const std::vector<utilities::Variant>& args)
-    {
-        using ArgTypes = utilities::FunctionArgTypes<FunctionType>;
-        return GetArgTupleFromVariantsHelper<ArgTypes>(args);
-    }
-
-    // GetVariantsFromTupleType
-    template <typename ValueTupleType, size_t... Sequence>
-    std::vector<utilities::Variant> GetTupleVariantTypesHelper(std::index_sequence<Sequence...>)
-    {
-        return { utilities::MakeVariant<std::tuple_element_t<Sequence, ValueTupleType>>()... };
-    }
-
-    template <typename ValueTupleType>
-    std::vector<utilities::Variant> GetVariantsFromTupleType()
-    {
-        return GetTupleVariantTypesHelper<ValueTupleType>(std::make_index_sequence<std::tuple_size<ValueTupleType>::value>{});
-    }
-
-    // GetVariantsFromFunctionArgs
-    template <typename FunctionType>
-    std::vector<utilities::Variant> GetVariantsFromFunctionArgs()
-    {
-        return GetVariantsFromTupleType<utilities::FunctionArgTypes<FunctionType>>();
-    }
-
-    template <typename FunctionType>
-    std::vector<utilities::Variant> GetVariantsFromFunctionArgs(FunctionType&)
-    {
-        return GetVariantsFromTupleType<utilities::FunctionArgTypes<FunctionType>>();
-    }
-
-    template <typename FunctionType, size_t... Sequence>
-    auto CallFunctionWithVariantsHelper(FunctionType& function, const std::vector<utilities::Variant>& args, std::index_sequence<Sequence...>) -> FunctionReturnType<FunctionType>
-    {
-        auto argsTuple = GetTupleFromVariants(function, args);
-        return function(std::get<Sequence>(argsTuple)...);
-    }
-
-    template <typename FunctionType>
-    auto CallFunctionWithVariants(FunctionType& function, const std::vector<utilities::Variant>& args) -> FunctionReturnType<FunctionType>
-    {
-        using ArgTypes = utilities::FunctionArgTypes<FunctionType>;
-        return CallFunctionWithVariantsHelper(function, args, std::make_index_sequence<std::tuple_size<ArgTypes>::value>());
-    }
-} // namespace utilities
-} // namespace ell
diff --git a/libraries/utilities/tcc/XmlArchiver.tcc b/libraries/utilities/tcc/XmlArchiver.tcc
deleted file mode 100644
index c75942676..000000000
--- a/libraries/utilities/tcc/XmlArchiver.tcc
+++ /dev/null
@@ -1,281 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     XmlArchiver.tcc (utilities)
-//  Authors:  Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace utilities
-{
-    //
-    // Serialization
-    //
-    template <typename ValueType, IsFundamental<ValueType> concept>
-    void XmlArchiver::WriteScalar(const char* name, const ValueType& value)
-    {
-        EnsureMaxPrecision<ValueType> precisionScope(_out);
-        using std::to_string;
-        auto indent = GetCurrentIndent();
-        bool hasName = name != std::string("");
-        auto endOfLine = hasName ? "\n" : "";
-        auto typeName = XmlUtilities::EncodeTypeName(GetArchivedTypeName<ValueType>());
-
-        _out << indent;
-        _out << "<" << typeName;
-
-        if (hasName)
-        {
-            _out << " name='" << name << "'";
-        }
-
-        _out << " value='" << to_string(value) << "'/>" << endOfLine;
-    }
-
-    // Specialization for bool (though perhaps this should be an overload, not a specialization)
-    template <>
-    inline void XmlArchiver::WriteScalar(const char* name, const bool& value)
-    {
-        auto indent = GetCurrentIndent();
-        bool hasName = name != std::string("");
-        auto endOfLine = hasName ? "\n" : "";
-        auto typeName = "bool";
-
-        _out << indent;
-        _out << "<" << typeName;
-
-        if (hasName)
-        {
-            _out << " name='" << name << "'";
-        }
-
-        _out << " value='" << (value ? "true" : "false") << "'/>" << endOfLine;
-    }
-
-    inline std::string XmlEncodeString(std::string s)
-    {
-        return s;
-    }
-
-    // This function is inline just so it appears next to the other Write* functions
-    inline void XmlArchiver::WriteScalar(const char* name, const char* value)
-    {
-        auto indent = GetCurrentIndent();
-        bool hasName = name != std::string("");
-        auto endOfLine = hasName ? "\n" : "";
-        auto typeName = "string";
-
-        _out << indent;
-        _out << "<" << typeName;
-
-        if (hasName)
-        {
-            _out << " name='" << name << "'";
-        }
-        _out << " value='" << XmlUtilities::EncodeAttributeString(value) << "'/>" << endOfLine;
-    }
-
-    inline void XmlArchiver::WriteScalar(const char* name, const std::string& value)
-    {
-        auto indent = GetCurrentIndent();
-        bool hasName = name != std::string("");
-        auto endOfLine = hasName ? "\n" : "";
-        auto typeName = "string";
-
-        _out << indent;
-        _out << "<" << typeName;
-
-        if (hasName)
-        {
-            _out << " name='" << name << "'";
-        }
-        _out << " value='" << XmlUtilities::EncodeAttributeString(value) << "'/>" << endOfLine;
-    }
-
-    template <typename ValueType>
-    void XmlArchiver::WriteArray(const char* name, const std::vector<ValueType>& array)
-    {
-        bool hasName = name != std::string("");
-        auto indent = GetCurrentIndent();
-        auto typeName = XmlUtilities::EncodeTypeName(GetArchivedTypeName<ValueType>());
-
-        _out << indent;
-        _out << "<Array";
-        if (hasName)
-        {
-            _out << " name='" << name << "'";
-        }
-        _out << " type='" << typeName << "'>" << std::endl;
-
-        // Indent the next line (the line with the array elements), and then
-        // set the indent to 0 (so there isn't indentation inside the line)
-        auto oldIndent = _indent;
-        IncrementIndent();
-        indent = GetCurrentIndent();
-        _out << indent;
-
-        SetIndent(0);
-        for (const auto& item : array)
-        {
-            Archive(item);
-            _out << " ";
-        }
-        SetIndent(oldIndent);
-        _out << std::endl;
-        _out << indent;
-        _out << "</Array>" << std::endl;
-    }
-
-    //
-    // Deserialization
-    //
-    template <typename ValueType, IsIntegral<ValueType> concept>
-    void XmlUnarchiver::ReadScalar(const char* name, ValueType& value)
-    {
-        auto typeName = XmlUtilities::EncodeTypeName(GetArchivedTypeName<ValueType>());
-        bool hasName = name != std::string("");
-
-        _tokenizer.MatchTokens({ "<", typeName });
-        if (hasName)
-        {
-            _tokenizer.MatchTokens({ "name", "=", "'", name, "'" });
-        }
-        _tokenizer.MatchTokens({ "value", "=", "'" });
-
-        // read value
-        auto valueToken = _tokenizer.ReadNextToken();
-        value = static_cast<ValueType>(std::stoll(valueToken));
-
-        _tokenizer.MatchTokens({ "'", "/", ">" });
-    }
-
-    template <typename ValueType, IsFloatingPoint<ValueType> concept>
-    void XmlUnarchiver::ReadScalar(const char* name, ValueType& value)
-    {
-        auto typeName = XmlUtilities::EncodeTypeName(GetArchivedTypeName<ValueType>());
-        bool hasName = name != std::string("");
-
-        _tokenizer.MatchTokens({ "<", typeName });
-        if (hasName)
-        {
-            _tokenizer.MatchTokens({ "name", "=", "'", name, "'" });
-        }
-        _tokenizer.MatchTokens({ "value", "=", "'" });
-
-        // read value
-        auto valueToken = _tokenizer.ReadNextToken();
-        value = static_cast<ValueType>(std::stod(valueToken));
-
-        _tokenizer.MatchTokens({ "'", "/", ">" });
-    }
-
-    template <>
-    inline void XmlUnarchiver::ReadScalar(const char* name, bool& value)
-    {
-        auto typeName = "bool";
-        bool hasName = name != std::string("");
-
-        _tokenizer.MatchTokens({ "<", typeName });
-        if (hasName)
-        {
-            _tokenizer.MatchTokens({ "name", "=", "'", name, "'" });
-        }
-        _tokenizer.MatchTokens({ "value", "=", "'" });
-
-        // read value
-        auto valueToken = _tokenizer.ReadNextToken();
-        value = (valueToken == "true");
-
-        _tokenizer.MatchTokens({ "'", "/", ">" });
-    }
-
-    // This function is inline just so it appears next to the other Read* functions
-    inline void XmlUnarchiver::ReadScalar(const char* name, std::string& value)
-    {
-        auto typeName = "string";
-        bool hasName = name != std::string("");
-
-        _tokenizer.MatchTokens({ "<", typeName });
-        if (hasName)
-        {
-            _tokenizer.MatchTokens({ "name", "=", "'", name, "'" });
-        }
-        _tokenizer.MatchTokens({ "value", "=", "'" });
-
-        // read value
-        auto valueToken = _tokenizer.ReadNextToken();
-        value = XmlUtilities::DecodeAttributeString(valueToken);
-
-        _tokenizer.MatchTokens({ "'", "/", ">" });
-    }
-
-    template <typename ValueType, IsFundamental<ValueType> concept>
-    void XmlUnarchiver::ReadArray(const char* name, std::vector<ValueType>& array)
-    {
-        auto typeName = XmlUtilities::EncodeTypeName(GetArchivedTypeName<ValueType>());
-        bool hasName = name != std::string("");
-
-        _tokenizer.MatchTokens({ "<", "Array" });
-        if (hasName)
-        {
-            _tokenizer.MatchTokens({ "name", "=", "'", name, "'" });
-        }
-
-        _tokenizer.MatchTokens({ "type", "=", "'", typeName, "'", ">" });
-        while (true)
-        {
-            ValueType obj;
-            Unarchive(obj);
-            array.push_back(obj);
-
-            // check for '</'
-            auto token1 = _tokenizer.ReadNextToken();
-            auto token2 = _tokenizer.ReadNextToken();
-            _tokenizer.PutBackToken(token2);
-            _tokenizer.PutBackToken(token1);
-            if (token1 + token2 == "</")
-            {
-                break;
-            }
-        }
-
-        _tokenizer.MatchTokens({ "<", "/", "Array", ">" });
-    }
-
-    inline void XmlUnarchiver::ReadArray(const char* name, std::vector<std::string>& array)
-    {
-        auto typeName = XmlUtilities::EncodeTypeName(TypeName<std::string>::GetName());
-        bool hasName = name != std::string("");
-
-        _tokenizer.MatchTokens({ "<", "Array" });
-        if (hasName)
-        {
-            _tokenizer.MatchTokens({ "name", "=", "'", name, "'" });
-        }
-
-        _tokenizer.MatchTokens({ "type", "=", "'", typeName, "'", ">" });
-
-        std::string nextToken = "";
-        while (true)
-        {
-            std::string obj;
-            Unarchive(obj);
-            array.push_back(obj);
-
-            // check for '</'
-            auto token1 = _tokenizer.ReadNextToken();
-            auto token2 = _tokenizer.ReadNextToken();
-            _tokenizer.PutBackToken(token2);
-            _tokenizer.PutBackToken(token1);
-            if (token1 + token2 == "</")
-            {
-                break;
-            }
-        }
-
-        _tokenizer.MatchTokens({ "<", "/", "Array", ">" });
-    }
-} // namespace utilities
-} // namespace ell
diff --git a/libraries/value/CMakeLists.txt b/libraries/value/CMakeLists.txt
index 1a8cb295d..f9c6c6bc8 100644
--- a/libraries/value/CMakeLists.txt
+++ b/libraries/value/CMakeLists.txt
@@ -39,22 +39,14 @@ set(include
     include/ValueVectorOperations.h
 )
 
-set(tcc
-    tcc/EmitterContext.tcc
-    tcc/Value.tcc
-    tcc/ValueMatrix.tcc
-    tcc/ValueTensor.tcc
-)
-
 set(doc
     doc/README.md)
 
 source_group("src" FILES ${src})
 source_group("include" FILES ${include})
-source_group("tcc" FILES ${tcc})
 source_group("doc" FILES ${doc})
 
-add_library(${library_name} ${src} ${include} ${tcc} ${doc})
+add_library(${library_name} ${src} ${include} ${doc})
 target_include_directories(${library_name} PRIVATE include ${ELL_LIBRARIES_DIR})
 target_include_directories(${library_name} SYSTEM PUBLIC ${LLVM_INCLUDE_DIRS})
 target_link_libraries(${library_name} ${LLVM_LIBS} emitters utilities)
diff --git a/libraries/value/doc/README.md b/libraries/value/doc/README.md
index 2eaff3bc4..81c355830 100644
--- a/libraries/value/doc/README.md
+++ b/libraries/value/doc/README.md
@@ -24,9 +24,6 @@ This is to make the implementation are clear and concise as possible without
 littering it with namespace-level qualifications. NB: this may not be necessary
 due to ADL, depending on what one is trying to do.
 
-As this API will be type-erased (more below), usage of this API does not need
-to be templated and thus does not need to be in `.h`/`.tcc` files.
-
 ## Classes
 * `Value` - top-level type-erased class that will be the basis of all
   implementations
diff --git a/libraries/value/include/EmitterContext.h b/libraries/value/include/EmitterContext.h
index ce07b6510..2f18e2a0a 100644
--- a/libraries/value/include/EmitterContext.h
+++ b/libraries/value/include/EmitterContext.h
@@ -439,4 +439,172 @@ namespace value
 } // namespace value
 } // namespace ell
 
-#include "../tcc/EmitterContext.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace value
+{
+    namespace detail
+    {
+        // Until MacOS's compiler has proper std::function deduction guides
+#if defined(__APPLE__)
+        template <typename Fn>
+        struct Function : public std::function<Fn>
+        {
+            using std::function<Fn>::function;
+        };
+
+        template <typename>
+        struct StdFunctionDeductionGuideHelper
+        {};
+
+        template <typename ReturnT, typename Class, bool IsNoExcept, typename... Args>
+        struct StdFunctionDeductionGuideHelper<ReturnT (Class::*)(Args...) noexcept(IsNoExcept)>
+        {
+            using Type = ReturnT(Args...);
+        };
+
+        template <typename ReturnT, typename Class, bool IsNoExcept, typename... Args>
+        struct StdFunctionDeductionGuideHelper<ReturnT (Class::*)(Args...) & noexcept(IsNoExcept)>
+        {
+            using Type = ReturnT(Args...);
+        };
+
+        template <typename ReturnT, typename Class, bool IsNoExcept, typename... Args>
+        struct StdFunctionDeductionGuideHelper<ReturnT (Class::*)(Args...) const noexcept(IsNoExcept)>
+        {
+            using Type = ReturnT(Args...);
+        };
+
+        template <typename ReturnT, typename Class, bool IsNoExcept, typename... Args>
+        struct StdFunctionDeductionGuideHelper<ReturnT (Class::*)(Args...) const& noexcept(IsNoExcept)>
+        {
+            using Type = ReturnT(Args...);
+        };
+
+        template <typename ReturnT, typename... Args>
+        Function(ReturnT (*)(Args...))->Function<ReturnT(Args...)>;
+
+        template <typename Functor,
+                  typename Signature = typename StdFunctionDeductionGuideHelper<decltype(&Functor::operator())>::Type>
+        Function(Functor)->Function<Signature>;
+#endif // defined(__APPLE__)
+
+        std::function<void()> CreateFunction(std::string fnName, std::function<void()> fn);
+
+        template <typename ReturnT>
+        std::function<ReturnT()> CreateFunction(std::string fnName, Value returnValue, std::function<ReturnT()> fn)
+        {
+            auto createdFn = GetContext().CreateFunction(fnName, returnValue, [fn = std::move(fn)]() -> Value {
+                ReturnT r = fn();
+                return r.GetValue();
+            });
+
+            return [createdFn = std::move(createdFn)]() -> ReturnT { return ReturnT(createdFn()); };
+        }
+
+        template <typename... Args>
+        std::function<void(Args...)> CreateFunction(std::string fnName, std::vector<Value> argValues, std::function<void(Args...)> fn)
+        {
+            constexpr auto argSize = sizeof...(Args);
+            if (argValues.size() != argSize)
+            {
+                throw utilities::InputException(utilities::InputExceptionErrors::invalidSize);
+            }
+
+            auto createdFn =
+                GetContext().CreateFunction(fnName, argValues, [fn = std::move(fn)](std::vector<Value> args) -> void {
+                    std::tuple<Args...> tupleArgs = utilities::VectorToTuple<Args...>(args);
+                    std::apply(fn, tupleArgs);
+                });
+
+            return [createdFn = std::move(createdFn)](Args&&... args) -> void {
+                constexpr auto argSize = sizeof...(Args);
+                std::vector<Value> argValues;
+                argValues.reserve(argSize);
+                (argValues.push_back(args.GetValue()), ...);
+
+                createdFn(argValues);
+            };
+        }
+
+        template <typename ReturnT, typename... Args>
+        std::function<ReturnT(Args...)> CreateFunction(std::string fnName, Value returnValue, std::vector<Value> argValues, std::function<ReturnT(Args...)> fn)
+        {
+            constexpr auto argSize = sizeof...(Args);
+            if (argValues.size() != argSize)
+            {
+                throw utilities::InputException(utilities::InputExceptionErrors::invalidSize);
+            }
+
+            auto createdFn = GetContext().CreateFunction(fnName,
+                                                         returnValue,
+                                                         argValues,
+                                                         [fn = std::move(fn)](std::vector<Value> args) -> Value {
+                                                             std::tuple<Args...> tupleArgs =
+                                                                 utilities::VectorToTuple<Args...>(args);
+                                                             ReturnT r = std::apply(fn, tupleArgs);
+                                                             return r.GetValue();
+                                                         });
+
+            return [createdFn = std::move(createdFn)](Args&&... args) -> ReturnT {
+                constexpr auto argSize = sizeof...(Args);
+                std::vector<Value> argValues;
+                argValues.reserve(argSize);
+                (argValues.push_back(args.GetValue()), ...);
+
+                return ReturnT(createdFn(argValues));
+            };
+        }
+
+    } // namespace detail
+
+#if defined(__APPLE__)
+#define FUNCTION_TYPE detail::Function
+#else
+#define FUNCTION_TYPE std::function
+#endif // defined(__APPLE__)
+
+    template <typename Fn>
+    auto CreateFunction(std::string fnName, Fn&& fn)
+    {
+        return detail::CreateFunction(fnName, FUNCTION_TYPE(fn));
+    }
+
+    template <typename Fn>
+    auto CreateFunction(std::string fnName, Value returnValue, Fn&& fn)
+    {
+        return detail::CreateFunction(fnName, returnValue, FUNCTION_TYPE(fn));
+    }
+
+    template <typename Fn>
+    auto CreateFunction(std::string fnName, std::vector<Value> argValues, Fn&& fn)
+    {
+        return detail::CreateFunction(fnName, argValues, FUNCTION_TYPE(fn));
+    }
+
+    template <typename Fn>
+    auto CreateFunction(std::string fnName, Value returnValue, std::vector<Value> argValues, Fn&& fn)
+    {
+        return detail::CreateFunction(fnName, returnValue, argValues, FUNCTION_TYPE(fn));
+    }
+
+#undef FUNCTION_TYPE
+
+    template <typename ContextType, typename Fn>
+    void InvokeForContext(Fn&& fn)
+    {
+        static_assert(std::is_base_of_v<EmitterContext, std::decay_t<ContextType>>,
+                      "ContextType must be derived from EmitterContext");
+
+        if (auto ptr = dynamic_cast<ContextType*>(&GetContext()); ptr != nullptr)
+        {
+            fn(*ptr);
+        }
+    }
+
+} // namespace value
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/value/include/Value.h b/libraries/value/include/Value.h
index 8d9264f44..88fd73cfe 100644
--- a/libraries/value/include/Value.h
+++ b/libraries/value/include/Value.h
@@ -384,4 +384,20 @@ namespace value
 } // namespace value
 } // namespace ell
 
-#include "../tcc/Value.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace value
+{
+
+    template <typename T>
+    Value Cast(Value value)
+    {
+        return Cast(value, GetValueType<T>());
+    }
+
+} // namespace value
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/value/include/ValueMatrix.h b/libraries/value/include/ValueMatrix.h
index 9ebb4489f..4aa41e207 100644
--- a/libraries/value/include/ValueMatrix.h
+++ b/libraries/value/include/ValueMatrix.h
@@ -96,4 +96,46 @@ namespace value
 } // namespace value
 } // namespace ell
 
-#include "../tcc/ValueMatrix.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace value
+{
+
+    template <typename T>
+    Matrix::Matrix(const std::vector<std::vector<T>>& data)
+    {
+        using namespace utilities;
+
+        int numRows = static_cast<int>(data.size());
+        if (numRows <= 0)
+        {
+            throw InputException(InputExceptionErrors::invalidSize);
+        }
+
+        int numColumns = static_cast<int>(data[0].size());
+        if (numColumns <= 0)
+        {
+            throw InputException(InputExceptionErrors::invalidSize);
+        }
+
+        std::vector<T> coalesced(numRows * numColumns);
+        auto it = coalesced.begin();
+        for (const auto& row : data)
+        {
+            if (static_cast<int>(row.size()) != numColumns)
+            {
+                throw InputException(InputExceptionErrors::invalidSize);
+            }
+
+            it = std::copy(row.begin(), row.end(), it);
+        }
+
+        _value = Value(coalesced, MemoryLayout({ numRows, numColumns }));
+    }
+
+} // namespace value
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/value/include/ValueOperations.h b/libraries/value/include/ValueOperations.h
index 2a62fa902..0d861ea14 100644
--- a/libraries/value/include/ValueOperations.h
+++ b/libraries/value/include/ValueOperations.h
@@ -34,7 +34,7 @@ namespace value
     /// <remarks> The new Value instance's data is distinct from the original </remarks>
     Value Cast(Value value, ValueType type);
 
-    // Defined in Value.tcc
+    // Defined in Value.h's implementation region
     /// <summary> Cast a value to another type, returning a new value </summary>
     /// <typeparam name="T"> The type to which the data should be casted </typeparam>
     /// <param name="value"> The data to convert </param>
diff --git a/libraries/value/include/ValueTensor.h b/libraries/value/include/ValueTensor.h
index c287705af..23bbe2954 100644
--- a/libraries/value/include/ValueTensor.h
+++ b/libraries/value/include/ValueTensor.h
@@ -130,4 +130,60 @@ namespace value
 } // namespace value
 } // namespace ell
 
-#include "../tcc/ValueTensor.tcc"
+#pragma region implementation
+
+namespace ell
+{
+namespace value
+{
+
+    template <typename T>
+    Tensor::Tensor(const std::vector<std::vector<std::vector<T>>>& data)
+    {
+        using namespace utilities;
+
+        int numRows = static_cast<int>(data.size());
+        if (numRows <= 0)
+        {
+            throw InputException(InputExceptionErrors::invalidSize);
+        }
+
+        int numColumns = static_cast<int>(data[0].size());
+        if (numColumns <= 0)
+        {
+            throw InputException(InputExceptionErrors::invalidSize);
+        }
+
+        int numChannels = static_cast<int>(data[0][0].size());
+        if (numChannels <= 0)
+        {
+            throw InputException(InputExceptionErrors::invalidSize);
+        }
+
+        std::vector<T> coalesced(numRows * numColumns * numChannels);
+        auto it = coalesced.begin();
+        for (const auto& row : data)
+        {
+            if (static_cast<int>(row.size()) != numColumns)
+            {
+                throw InputException(InputExceptionErrors::invalidSize);
+            }
+
+            for (const auto& column : row)
+            {
+                if (static_cast<int>(column.size()) != numChannels)
+                {
+                    throw InputException(InputExceptionErrors::invalidSize);
+                }
+
+                it = std::copy(column.begin(), column.end(), it);
+            }
+        }
+
+        _value = Value(coalesced, MemoryLayout({ numRows, numColumns, numChannels }));
+    }
+
+} // namespace value
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/libraries/value/tcc/EmitterContext.tcc b/libraries/value/tcc/EmitterContext.tcc
deleted file mode 100644
index ab84379ec..000000000
--- a/libraries/value/tcc/EmitterContext.tcc
+++ /dev/null
@@ -1,173 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     EmitterContext.tcc (value)
-//  Authors:  Kern Handa
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace value
-{
-    namespace detail
-    {
-        // Until MacOS's compiler has proper std::function deduction guides
-#if defined(__APPLE__)
-        template <typename Fn>
-        struct Function : public std::function<Fn>
-        {
-            using std::function<Fn>::function;
-        };
-
-        template <typename>
-        struct StdFunctionDeductionGuideHelper
-        {};
-
-        template <typename ReturnT, typename Class, bool IsNoExcept, typename... Args>
-        struct StdFunctionDeductionGuideHelper<ReturnT (Class::*)(Args...) noexcept(IsNoExcept)>
-        {
-            using Type = ReturnT(Args...);
-        };
-
-        template <typename ReturnT, typename Class, bool IsNoExcept, typename... Args>
-        struct StdFunctionDeductionGuideHelper<ReturnT (Class::*)(Args...) & noexcept(IsNoExcept)>
-        {
-            using Type = ReturnT(Args...);
-        };
-
-        template <typename ReturnT, typename Class, bool IsNoExcept, typename... Args>
-        struct StdFunctionDeductionGuideHelper<ReturnT (Class::*)(Args...) const noexcept(IsNoExcept)>
-        {
-            using Type = ReturnT(Args...);
-        };
-
-        template <typename ReturnT, typename Class, bool IsNoExcept, typename... Args>
-        struct StdFunctionDeductionGuideHelper<ReturnT (Class::*)(Args...) const& noexcept(IsNoExcept)>
-        {
-            using Type = ReturnT(Args...);
-        };
-
-        template <typename ReturnT, typename... Args>
-        Function(ReturnT (*)(Args...))->Function<ReturnT(Args...)>;
-
-        template <typename Functor,
-                  typename Signature = typename StdFunctionDeductionGuideHelper<decltype(&Functor::operator())>::Type>
-        Function(Functor)->Function<Signature>;
-#endif // defined(__APPLE__)
-
-        std::function<void()> CreateFunction(std::string fnName, std::function<void()> fn);
-
-        template <typename ReturnT>
-        std::function<ReturnT()> CreateFunction(std::string fnName, Value returnValue, std::function<ReturnT()> fn)
-        {
-            auto createdFn = GetContext().CreateFunction(fnName, returnValue, [fn = std::move(fn)]() -> Value {
-                ReturnT r = fn();
-                return r.GetValue();
-            });
-
-            return [createdFn = std::move(createdFn)]() -> ReturnT { return ReturnT(createdFn()); };
-        }
-
-        template <typename... Args>
-        std::function<void(Args...)> CreateFunction(std::string fnName, std::vector<Value> argValues, std::function<void(Args...)> fn)
-        {
-            constexpr auto argSize = sizeof...(Args);
-            if (argValues.size() != argSize)
-            {
-                throw utilities::InputException(utilities::InputExceptionErrors::invalidSize);
-            }
-
-            auto createdFn =
-                GetContext().CreateFunction(fnName, argValues, [fn = std::move(fn)](std::vector<Value> args) -> void {
-                    std::tuple<Args...> tupleArgs = utilities::VectorToTuple<Args...>(args);
-                    std::apply(fn, tupleArgs);
-                });
-
-            return [createdFn = std::move(createdFn)](Args&&... args) -> void {
-                constexpr auto argSize = sizeof...(Args);
-                std::vector<Value> argValues;
-                argValues.reserve(argSize);
-                (argValues.push_back(args.GetValue()), ...);
-
-                createdFn(argValues);
-            };
-        }
-
-        template <typename ReturnT, typename... Args>
-        std::function<ReturnT(Args...)> CreateFunction(std::string fnName, Value returnValue, std::vector<Value> argValues, std::function<ReturnT(Args...)> fn)
-        {
-            constexpr auto argSize = sizeof...(Args);
-            if (argValues.size() != argSize)
-            {
-                throw utilities::InputException(utilities::InputExceptionErrors::invalidSize);
-            }
-
-            auto createdFn = GetContext().CreateFunction(fnName,
-                                                         returnValue,
-                                                         argValues,
-                                                         [fn = std::move(fn)](std::vector<Value> args) -> Value {
-                                                             std::tuple<Args...> tupleArgs =
-                                                                 utilities::VectorToTuple<Args...>(args);
-                                                             ReturnT r = std::apply(fn, tupleArgs);
-                                                             return r.GetValue();
-                                                         });
-
-            return [createdFn = std::move(createdFn)](Args&&... args) -> ReturnT {
-                constexpr auto argSize = sizeof...(Args);
-                std::vector<Value> argValues;
-                argValues.reserve(argSize);
-                (argValues.push_back(args.GetValue()), ...);
-
-                return ReturnT(createdFn(argValues));
-            };
-        }
-
-    } // namespace detail
-
-#if defined(__APPLE__)
-#define FUNCTION_TYPE detail::Function
-#else
-#define FUNCTION_TYPE std::function
-#endif // defined(__APPLE__)
-
-    template <typename Fn>
-    auto CreateFunction(std::string fnName, Fn&& fn)
-    {
-        return detail::CreateFunction(fnName, FUNCTION_TYPE(fn));
-    }
-
-    template <typename Fn>
-    auto CreateFunction(std::string fnName, Value returnValue, Fn&& fn)
-    {
-        return detail::CreateFunction(fnName, returnValue, FUNCTION_TYPE(fn));
-    }
-
-    template <typename Fn>
-    auto CreateFunction(std::string fnName, std::vector<Value> argValues, Fn&& fn)
-    {
-        return detail::CreateFunction(fnName, argValues, FUNCTION_TYPE(fn));
-    }
-
-    template <typename Fn>
-    auto CreateFunction(std::string fnName, Value returnValue, std::vector<Value> argValues, Fn&& fn)
-    {
-        return detail::CreateFunction(fnName, returnValue, argValues, FUNCTION_TYPE(fn));
-    }
-
-#undef FUNCTION_TYPE
-
-    template <typename ContextType, typename Fn>
-    void InvokeForContext(Fn&& fn)
-    {
-        static_assert(std::is_base_of_v<EmitterContext, std::decay_t<ContextType>>,
-                      "ContextType must be derived from EmitterContext");
-
-        if (auto ptr = dynamic_cast<ContextType*>(&GetContext()); ptr != nullptr)
-        {
-            fn(*ptr);
-        }
-    }
-
-} // namespace value
-} // namespace ell
\ No newline at end of file
diff --git a/libraries/value/tcc/Value.tcc b/libraries/value/tcc/Value.tcc
deleted file mode 100644
index 60dd58f5f..000000000
--- a/libraries/value/tcc/Value.tcc
+++ /dev/null
@@ -1,21 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     Value.tcc (value)
-//  Authors:  Kern Handa
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace value
-{
-
-    template <typename T>
-    Value Cast(Value value)
-    {
-        return Cast(value, GetValueType<T>());
-    }
-
-} // namespace value
-} // namespace ell
\ No newline at end of file
diff --git a/libraries/value/tcc/ValueMatrix.tcc b/libraries/value/tcc/ValueMatrix.tcc
deleted file mode 100644
index c94fefffb..000000000
--- a/libraries/value/tcc/ValueMatrix.tcc
+++ /dev/null
@@ -1,47 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     ValueMatrix.tcc (value)
-//  Authors:  Kern Handa
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace value
-{
-
-    template <typename T>
-    Matrix::Matrix(const std::vector<std::vector<T>>& data)
-    {
-        using namespace utilities;
-
-        int numRows = static_cast<int>(data.size());
-        if (numRows <= 0)
-        {
-            throw InputException(InputExceptionErrors::invalidSize);
-        }
-
-        int numColumns = static_cast<int>(data[0].size());
-        if (numColumns <= 0)
-        {
-            throw InputException(InputExceptionErrors::invalidSize);
-        }
-
-        std::vector<T> coalesced(numRows * numColumns);
-        auto it = coalesced.begin();
-        for (const auto& row : data)
-        {
-            if (static_cast<int>(row.size()) != numColumns)
-            {
-                throw InputException(InputExceptionErrors::invalidSize);
-            }
-
-            it = std::copy(row.begin(), row.end(), it);
-        }
-
-        _value = Value(coalesced, MemoryLayout({ numRows, numColumns }));
-    }
-
-} // namespace value
-} // namespace ell
\ No newline at end of file
diff --git a/libraries/value/tcc/ValueTensor.tcc b/libraries/value/tcc/ValueTensor.tcc
deleted file mode 100644
index c94601910..000000000
--- a/libraries/value/tcc/ValueTensor.tcc
+++ /dev/null
@@ -1,61 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     ValueTensor.tcc (value)
-//  Authors:  Kern Handa
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace ell
-{
-namespace value
-{
-
-    template <typename T>
-    Tensor::Tensor(const std::vector<std::vector<std::vector<T>>>& data)
-    {
-        using namespace utilities;
-
-        int numRows = static_cast<int>(data.size());
-        if (numRows <= 0)
-        {
-            throw InputException(InputExceptionErrors::invalidSize);
-        }
-
-        int numColumns = static_cast<int>(data[0].size());
-        if (numColumns <= 0)
-        {
-            throw InputException(InputExceptionErrors::invalidSize);
-        }
-
-        int numChannels = static_cast<int>(data[0][0].size());
-        if (numChannels <= 0)
-        {
-            throw InputException(InputExceptionErrors::invalidSize);
-        }
-
-        std::vector<T> coalesced(numRows * numColumns * numChannels);
-        auto it = coalesced.begin();
-        for (const auto& row : data)
-        {
-            if (static_cast<int>(row.size()) != numColumns)
-            {
-                throw InputException(InputExceptionErrors::invalidSize);
-            }
-
-            for (const auto& column : row)
-            {
-                if (static_cast<int>(column.size()) != numChannels)
-                {
-                    throw InputException(InputExceptionErrors::invalidSize);
-                }
-
-                it = std::copy(column.begin(), column.end(), it);
-            }
-        }
-
-        _value = Value(coalesced, MemoryLayout({ numRows, numColumns, numChannels }));
-    }
-
-} // namespace value
-} // namespace ell
\ No newline at end of file
diff --git a/merge-tcc.py b/merge-tcc.py
new file mode 100644
index 000000000..a7c23ab49
--- /dev/null
+++ b/merge-tcc.py
@@ -0,0 +1,35 @@
+#!/usr/bin/env python3
+
+import sys
+import re
+import os
+
+INCLUDE_REGEX = re.compile(r'^#include "(.*\.tcc)"$')
+
+
+def consolidate_header(filepath: str):
+    with open(filepath, mode='r') as header_file:
+        header_contents = header_file.readlines()
+    new_header_contents = []
+    for line in header_contents:
+        m = INCLUDE_REGEX.match(line)
+        if not m:
+            new_header_contents.append(line)
+            continue
+        basepath, _ = os.path.split(filepath)
+        tcc_filepath = os.path.join(basepath, m[1])
+        with open(tcc_filepath, mode='r') as tcc_file:
+            tcc_contents = tcc_file.readlines()
+        while tcc_contents[0].startswith("//"):
+            tcc_contents.pop(0)
+        new_header_contents += ["\n\n#pragma region implementation\n\n"] + \
+            tcc_contents + ["\n\n#pragma endregion implementation\n"]
+        os.remove(tcc_filepath)
+    with open(filepath, mode='w') as header_file:
+        header_file.writelines(new_header_contents)
+
+
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        raise RuntimeError("Must provide a path to a header")
+    consolidate_header(sys.argv[1])
diff --git a/tools/utilities/compile/CMakeLists.txt b/tools/utilities/compile/CMakeLists.txt
index 3f456fb67..be68fd4f3 100644
--- a/tools/utilities/compile/CMakeLists.txt
+++ b/tools/utilities/compile/CMakeLists.txt
@@ -26,7 +26,7 @@ source_group("include" FILES ${include})
 set (GLOBAL_BIN_DIR ${CMAKE_BINARY_DIR}/bin)
 set (EXECUTABLE_OUTPUT_PATH ${GLOBAL_BIN_DIR})
 
-add_executable(${tool_name} ${src} ${include} ${tcc})
+add_executable(${tool_name} ${src} ${include})
 target_include_directories(${tool_name} PRIVATE include ${ELL_LIBRARIES_DIR})
 target_link_libraries(${tool_name} common model passes utilities)
 copy_shared_libraries(${tool_name})
diff --git a/tools/utilities/debugCompiler/CMakeLists.txt b/tools/utilities/debugCompiler/CMakeLists.txt
index fb77ed370..4ce722162 100644
--- a/tools/utilities/debugCompiler/CMakeLists.txt
+++ b/tools/utilities/debugCompiler/CMakeLists.txt
@@ -20,18 +20,13 @@ set (include
   include/VectorStatistics.h
 )
 
-set (tcc
-  tcc/VectorStatistics.tcc
-)
-
 source_group("src" FILES ${src})
 source_group("include" FILES ${include})
-source_group("tcc" FILES ${tcc})
 
 # create executable in build\bin
 set (GLOBAL_BIN_DIR ${CMAKE_BINARY_DIR}/bin)
 set (EXECUTABLE_OUTPUT_PATH ${GLOBAL_BIN_DIR})
-add_executable(${tool_name} ${src} ${include} ${tcc})
+add_executable(${tool_name} ${src} ${include})
 target_include_directories(${tool_name} PRIVATE include ${ELL_LIBRARIES_DIR} ${CMAKE_CURRENT_LIST_DIR}/..)
 target_link_libraries(${tool_name} common math model nodes passes utilities pythonPlugins)
 copy_shared_libraries(${tool_name})
diff --git a/tools/utilities/debugCompiler/include/VectorStatistics.h b/tools/utilities/debugCompiler/include/VectorStatistics.h
index 629c44e0f..d9602e32c 100644
--- a/tools/utilities/debugCompiler/include/VectorStatistics.h
+++ b/tools/utilities/debugCompiler/include/VectorStatistics.h
@@ -89,4 +89,153 @@ template <typename TensorType>
 ell::math::ChannelColumnRowTensor<typename TensorType::TensorElementType> Abs(const TensorType& tensor);
 } // namespace ell
 
-#include "../tcc/VectorStatistics.tcc"
\ No newline at end of file
+#pragma region implementation
+
+#include <algorithm>
+#include <cassert>
+
+namespace ell
+{
+//
+// VectorStatistics class
+//
+template <typename ValueType>
+VectorStatistics::VectorStatistics(const std::vector<ValueType>& vec)
+{
+    Initialize(vec);
+}
+
+template <typename TensorType>
+VectorStatistics::VectorStatistics(const TensorType& tensor)
+{
+    Initialize(tensor.ToArray());
+}
+
+template <typename ValueType>
+void VectorStatistics::Initialize(const std::vector<ValueType>& vec)
+{
+    if (vec.size() == 0)
+    {
+        _valid = false;
+        return;
+    }
+
+    // compute basic stats
+    _min = static_cast<double>(vec[0]);
+    _max = static_cast<double>(vec[0]);
+    double sum = 0;
+    for (auto x : vec)
+    {
+        double d = static_cast<double>(x);
+        _min = std::min(_min, d);
+        _max = std::max(_max, d);
+        sum += d;
+    }
+    _mean = sum / vec.size();
+
+    double sumDiffMeanSq = 0;
+    for (auto x : vec)
+    {
+        auto diff = x - _mean;
+        sumDiffMeanSq += diff * diff;
+    }
+    _variance = sumDiffMeanSq / vec.size();
+    _stdDev = std::sqrt(_variance);
+    _size = vec.size();
+    _valid = true;
+}
+
+template <typename ValueType>
+double VectorStatistics::Diff(const std::vector<ValueType>& vec1, const std::vector<ValueType>& vec2)
+{
+    ValueType error = 0;
+    for (size_t i = 0, len1 = vec1.size(), len2 = vec2.size(); i < len1 || i < len2; i++)
+    {
+        if (i < len1 && i < len2)
+        {
+            ValueType e = vec1[i];
+            ValueType f = vec2[i];
+            if (e != f)
+            {
+                error += fabs(e - f);
+            }
+        }
+        else if (i < len1)
+        {
+            ValueType e = vec1[i];
+            error += fabs(e);
+        }
+        else if (i < len2)
+        {
+            ValueType f = vec2[i];
+            error += fabs(f);
+        }
+    }
+    return error;
+}
+
+template <typename TensorType>
+double VectorStatistics::Diff(const TensorType& tensor1, const TensorType& tensor2)
+{
+    return Diff(tensor1.ToArray(), tensor2.ToArray());
+}
+
+template <typename ValueType>
+std::vector<ValueType> Subtract(const std::vector<ValueType>& vec1, const std::vector<ValueType>& vec2)
+{
+    auto size1 = vec1.size();
+    auto size2 = vec2.size();
+    assert(size1 == size2); // require this for now
+    auto maxSize = std::max(size1, size2);
+    std::vector<ValueType> result(maxSize);
+    for (size_t index = 0; index < maxSize; ++index)
+    {
+        auto val1 = index < size1 ? vec1[index] : 0;
+        auto val2 = index < size2 ? vec2[index] : 0;
+        result[index] = val1 - val2;
+    }
+
+    return result;
+}
+
+template <typename TensorType>
+ell::math::ChannelColumnRowTensor<typename TensorType::TensorElementType> Subtract(const TensorType& tensor1, const TensorType& tensor2)
+{
+    ell::math::ChannelColumnRowTensor<typename TensorType::TensorElementType> result(tensor1);
+
+    for (size_t i = 0; i < result.NumRows(); ++i)
+    {
+        for (size_t j = 0; j < result.NumColumns(); ++j)
+        {
+            for (size_t k = 0; k < result.NumChannels(); ++k)
+            {
+                result(i, j, k) = tensor1(i, j, k) - tensor2(i, j, k);
+            }
+        }
+    }
+
+    return result;
+}
+
+template <typename ValueType>
+std::vector<ValueType> Abs(const std::vector<ValueType>& vec)
+{
+    std::vector<ValueType> result(vec.size());
+    for (size_t index = 0; index < vec.size(); ++index)
+    {
+        result[index] = std::fabs(vec[index]);
+    }
+
+    return result;
+}
+
+template <typename TensorType>
+ell::math::ChannelColumnRowTensor<typename TensorType::TensorElementType> Abs(const TensorType& tensor)
+{
+    ell::math::ChannelColumnRowTensor<typename TensorType::TensorElementType> result(tensor);
+    result.Transform([](auto x) { return std::abs(x); });
+    return result;
+}
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/tools/utilities/debugCompiler/tcc/VectorStatistics.tcc b/tools/utilities/debugCompiler/tcc/VectorStatistics.tcc
deleted file mode 100644
index 70679bd1b..000000000
--- a/tools/utilities/debugCompiler/tcc/VectorStatistics.tcc
+++ /dev/null
@@ -1,154 +0,0 @@
-//////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Project:  Embedded Learning Library (ELL)
-//  File:     VectorStatistics.tcc (debugCompiler)
-//  Authors:  Chuck Jacobs
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#include <algorithm>
-#include <cassert>
-
-namespace ell
-{
-//
-// VectorStatistics class
-//
-template <typename ValueType>
-VectorStatistics::VectorStatistics(const std::vector<ValueType>& vec)
-{
-    Initialize(vec);
-}
-
-template <typename TensorType>
-VectorStatistics::VectorStatistics(const TensorType& tensor)
-{
-    Initialize(tensor.ToArray());
-}
-
-template <typename ValueType>
-void VectorStatistics::Initialize(const std::vector<ValueType>& vec)
-{
-    if (vec.size() == 0)
-    {
-        _valid = false;
-        return;
-    }
-
-    // compute basic stats
-    _min = static_cast<double>(vec[0]);
-    _max = static_cast<double>(vec[0]);
-    double sum = 0;
-    for (auto x : vec)
-    {
-        double d = static_cast<double>(x);
-        _min = std::min(_min, d);
-        _max = std::max(_max, d);
-        sum += d;
-    }
-    _mean = sum / vec.size();
-
-    double sumDiffMeanSq = 0;
-    for (auto x : vec)
-    {
-        auto diff = x - _mean;
-        sumDiffMeanSq += diff * diff;
-    }
-    _variance = sumDiffMeanSq / vec.size();
-    _stdDev = std::sqrt(_variance);
-    _size = vec.size();
-    _valid = true;
-}
-
-template <typename ValueType>
-double VectorStatistics::Diff(const std::vector<ValueType>& vec1, const std::vector<ValueType>& vec2)
-{
-    ValueType error = 0;
-    for (size_t i = 0, len1 = vec1.size(), len2 = vec2.size(); i < len1 || i < len2; i++)
-    {
-        if (i < len1 && i < len2)
-        {
-            ValueType e = vec1[i];
-            ValueType f = vec2[i];
-            if (e != f)
-            {
-                error += fabs(e - f);
-            }
-        }
-        else if (i < len1)
-        {
-            ValueType e = vec1[i];
-            error += fabs(e);
-        }
-        else if (i < len2)
-        {
-            ValueType f = vec2[i];
-            error += fabs(f);
-        }
-    }
-    return error;
-}
-
-template <typename TensorType>
-double VectorStatistics::Diff(const TensorType& tensor1, const TensorType& tensor2)
-{
-    return Diff(tensor1.ToArray(), tensor2.ToArray());
-}
-
-template <typename ValueType>
-std::vector<ValueType> Subtract(const std::vector<ValueType>& vec1, const std::vector<ValueType>& vec2)
-{
-    auto size1 = vec1.size();
-    auto size2 = vec2.size();
-    assert(size1 == size2); // require this for now
-    auto maxSize = std::max(size1, size2);
-    std::vector<ValueType> result(maxSize);
-    for (size_t index = 0; index < maxSize; ++index)
-    {
-        auto val1 = index < size1 ? vec1[index] : 0;
-        auto val2 = index < size2 ? vec2[index] : 0;
-        result[index] = val1 - val2;
-    }
-
-    return result;
-}
-
-template <typename TensorType>
-ell::math::ChannelColumnRowTensor<typename TensorType::TensorElementType> Subtract(const TensorType& tensor1, const TensorType& tensor2)
-{
-    ell::math::ChannelColumnRowTensor<typename TensorType::TensorElementType> result(tensor1);
-
-    for (size_t i = 0; i < result.NumRows(); ++i)
-    {
-        for (size_t j = 0; j < result.NumColumns(); ++j)
-        {
-            for (size_t k = 0; k < result.NumChannels(); ++k)
-            {
-                result(i, j, k) = tensor1(i, j, k) - tensor2(i, j, k);
-            }
-        }
-    }
-
-    return result;
-}
-
-template <typename ValueType>
-std::vector<ValueType> Abs(const std::vector<ValueType>& vec)
-{
-    std::vector<ValueType> result(vec.size());
-    for (size_t index = 0; index < vec.size(); ++index)
-    {
-        result[index] = std::fabs(vec[index]);
-    }
-
-    return result;
-}
-
-template <typename TensorType>
-ell::math::ChannelColumnRowTensor<typename TensorType::TensorElementType> Abs(const TensorType& tensor)
-{
-    ell::math::ChannelColumnRowTensor<typename TensorType::TensorElementType> result(tensor);
-    result.Transform([](auto x) { return std::abs(x); });
-    return result;
-}
-} // namespace ell
diff --git a/tools/utilities/print/CMakeLists.txt b/tools/utilities/print/CMakeLists.txt
index 8f198a535..ba2826024 100644
--- a/tools/utilities/print/CMakeLists.txt
+++ b/tools/utilities/print/CMakeLists.txt
@@ -19,18 +19,13 @@ set(include
     include/PrintModel.h
 )
 
-set(tcc
-    tcc/LayerInspector.tcc
-)
-
 source_group("src" FILES ${src})
 source_group("include" FILES ${include})
-source_group("tcc" FILES ${tcc})
 
 # create executable in build\bin
 set(GLOBAL_BIN_DIR ${CMAKE_BINARY_DIR}/bin)
 set(EXECUTABLE_OUTPUT_PATH ${GLOBAL_BIN_DIR})
-add_executable(${tool_name} ${src} ${include} ${tcc})
+add_executable(${tool_name} ${src} ${include})
 target_include_directories(${tool_name} PRIVATE include ${ELL_LIBRARIES_DIR})
 target_link_libraries(${tool_name} common model nodes passes utilities)
 copy_shared_libraries(${tool_name})
diff --git a/tools/utilities/print/include/LayerInspector.h b/tools/utilities/print/include/LayerInspector.h
index d3681f915..ed56e7553 100644
--- a/tools/utilities/print/include/LayerInspector.h
+++ b/tools/utilities/print/include/LayerInspector.h
@@ -24,4 +24,181 @@ struct NameValue
 };
 } // namespace ell
 
-#include "../tcc/LayerInspector.tcc"
+#pragma region implementation
+
+namespace ell
+{
+static std::string PaddingSchemeToString(ell::predictors::neural::PaddingScheme scheme)
+{
+    switch (scheme)
+    {
+    case ell::predictors::neural::PaddingScheme::zeros:
+        return "zeros";
+    case ell::predictors::neural::PaddingScheme::minusOnes:
+        return "minusOnes";
+    case ell::predictors::neural::PaddingScheme::alternatingZeroAndOnes:
+        return "alternatingZeroAndOnes";
+    case ell::predictors::neural::PaddingScheme::randomZeroAndOnes:
+        return "randomZeroAndOnes";
+    case ell::predictors::neural::PaddingScheme::min:
+        return "min";
+    case ell::predictors::neural::PaddingScheme::max:
+        return "max";
+    }
+    return "";
+}
+
+static std::string ConvolutionMethodToString(ell::predictors::neural::ConvolutionMethod method)
+{
+    switch (method)
+    {
+        /// <summary> Normal method of doing convolution via reshaping input into columns and performing a gemm operation. </summary>
+    case ell::predictors::neural::ConvolutionMethod::automatic:
+        return "automatic";
+    case ell::predictors::neural::ConvolutionMethod::diagonal:
+        return "diagonal";
+    case ell::predictors::neural::ConvolutionMethod::simple:
+        return "simple";
+    case ell::predictors::neural::ConvolutionMethod::winograd:
+        return "winograd";
+    case ell::predictors::neural::ConvolutionMethod::unrolled:
+        return "unrolled";
+    }
+    return "";
+}
+static std::string BinaryConvolutionMethodToString(ell::predictors::neural::BinaryConvolutionMethod method)
+{
+    switch (method)
+    {
+        /// <summary> Normal method of doing convolution via reshaping input into columns and performing a gemm operation. </summary>
+    case ell::predictors::neural::BinaryConvolutionMethod::gemm:
+        return "gemm";
+    case ell::predictors::neural::BinaryConvolutionMethod::bitwise:
+        return "bitwise";
+    };
+    return "";
+}
+static std::string BinaryWeightsScaleToString(ell::predictors::neural::BinaryWeightsScale method)
+{
+    switch (method)
+    {
+        /// <summary> Normal method of doing convolution via reshaping input into columns and performing a gemm operation. </summary>
+    case ell::predictors::neural::BinaryWeightsScale::none:
+        return "none";
+    case ell::predictors::neural::BinaryWeightsScale::mean:
+        return "mean";
+    };
+    return "";
+}
+
+template <typename ElementType>
+std::vector<NameValue> InspectActivationLayerParameters(const ell::predictors::neural::ActivationLayer<ElementType>* layer)
+{
+    std::vector<NameValue> result;
+    auto impl = layer->GetActivationFunction().GetImpl();
+    if (impl)
+    {
+        result.push_back(NameValue{ "activation", impl->GetRuntimeTypeName() });
+    }
+    return result;
+}
+
+template <typename ElementType>
+std::vector<NameValue> InspectBinaryConvolutionalLayerParameters(const ell::predictors::neural::BinaryConvolutionalLayer<ElementType>* layer)
+{
+    std::vector<NameValue> result;
+    auto params = layer->GetConvolutionalParameters();
+    result.push_back(NameValue{ "stride", std::to_string(params.stride) });
+    result.push_back(NameValue{ "method", BinaryConvolutionMethodToString(params.method) });
+    result.push_back(NameValue{ "receptiveField", std::to_string(params.receptiveField) });
+    result.push_back(NameValue{ "weightsScale", BinaryWeightsScaleToString(params.weightsScale) });
+    return result;
+}
+
+template <typename ElementType>
+std::vector<NameValue> InspectConvolutionalLayerParameters(const ell::predictors::neural::ConvolutionalLayer<ElementType>* layer)
+{
+    std::vector<NameValue> result;
+    auto params = layer->GetConvolutionalParameters();
+    auto weights = layer->GetWeights();
+    result.push_back(NameValue{ "stride", std::to_string(params.stride) });
+    result.push_back(NameValue{ "method", ConvolutionMethodToString(params.method) });
+    result.push_back(NameValue{ "receptiveField", std::to_string(params.receptiveField) });
+    result.push_back(NameValue{ "numFilters", std::to_string(params.numFiltersAtATime) });
+    result.push_back(NameValue{ "isSeparable", std::to_string(weights.NumChannels() == 1) });
+    return result;
+}
+
+template <typename ElementType, template <typename> class PoolingFunctionType>
+std::vector<NameValue> InspectPoolingLayerParameters(const ell::predictors::neural::PoolingLayer<ElementType, PoolingFunctionType>* layer)
+{
+    std::vector<NameValue> result;
+    auto params = layer->GetPoolingParameters();
+    result.push_back(NameValue{ "stride", std::to_string(params.stride) });
+    result.push_back(NameValue{ "size", std::to_string(params.poolingSize) });
+    return result;
+}
+
+template <typename ElementType>
+std::vector<NameValue> InspectLayerParameters(const ell::predictors::neural::Layer<ElementType>& layer)
+{
+    std::vector<NameValue> result;
+    auto params = layer.GetLayerParameters();
+    auto input = params.input;
+    auto shape = params.outputShape;
+
+    result.push_back(NameValue{ "shape", "[" + std::to_string(input.NumRows()) + "," + std::to_string(input.NumColumns()) + "," + std::to_string(input.NumChannels()) + "]->" + "[" + std::to_string(shape.NumRows()) + "," + std::to_string(shape.NumColumns()) + "," + std::to_string(shape.NumChannels()) + "]" });
+
+    auto inputpadding = params.inputPaddingParameters;
+    auto outputpadding = params.outputPaddingParameters;
+    if (inputpadding.paddingSize != 0)
+    {
+        result.push_back(NameValue{ "inputPadding", PaddingSchemeToString(inputpadding.paddingScheme) + "," + std::to_string(inputpadding.paddingSize) });
+    }
+    if (outputpadding.paddingSize != 0)
+    {
+        result.push_back(NameValue{ "outputPadding", PaddingSchemeToString(outputpadding.paddingScheme) + "," + std::to_string(outputpadding.paddingSize) });
+    }
+
+    const ell::predictors::neural::ActivationLayer<ElementType>* act = dynamic_cast<const ell::predictors::neural::ActivationLayer<ElementType>*>(&layer);
+    if (act != nullptr)
+    {
+        std::vector<NameValue> more = InspectActivationLayerParameters<ElementType>(act);
+        result.insert(result.end(), more.begin(), more.end());
+    }
+
+    const ell::predictors::neural::BinaryConvolutionalLayer<ElementType>* bcl = dynamic_cast<const ell::predictors::neural::BinaryConvolutionalLayer<ElementType>*>(&layer);
+    if (bcl != nullptr)
+    {
+        std::vector<NameValue> more = InspectBinaryConvolutionalLayerParameters<ElementType>(bcl);
+        result.insert(result.end(), more.begin(), more.end());
+    }
+
+    const ell::predictors::neural::ConvolutionalLayer<ElementType>* conv = dynamic_cast<const ell::predictors::neural::ConvolutionalLayer<ElementType>*>(&layer);
+    if (conv != nullptr)
+    {
+        std::vector<NameValue> more = InspectConvolutionalLayerParameters<ElementType>(conv);
+        result.insert(result.end(), more.begin(), more.end());
+    }
+
+    const ell::predictors::neural::PoolingLayer<ElementType, ell::predictors::neural::MaxPoolingFunction>* maxpooling = dynamic_cast<const ell::predictors::neural::PoolingLayer<ElementType, ell::predictors::neural::MaxPoolingFunction>*>(&layer);
+    if (maxpooling != nullptr)
+    {
+        result.push_back(NameValue{ "function", "maxpooling" });
+        std::vector<NameValue> more = InspectPoolingLayerParameters<ElementType, ell::predictors::neural::MaxPoolingFunction>(maxpooling);
+        result.insert(result.end(), more.begin(), more.end());
+    }
+
+    const ell::predictors::neural::PoolingLayer<ElementType, ell::predictors::neural::MeanPoolingFunction>* meanpooling = dynamic_cast<const ell::predictors::neural::PoolingLayer<ElementType, ell::predictors::neural::MeanPoolingFunction>*>(&layer);
+    if (meanpooling != nullptr)
+    {
+        result.push_back(NameValue{ "function", "meanpooling" });
+        std::vector<NameValue> more = InspectPoolingLayerParameters<ElementType, ell::predictors::neural::MeanPoolingFunction>(meanpooling);
+        result.insert(result.end(), more.begin(), more.end());
+    }
+
+    return result;
+}
+} // namespace ell
+
+#pragma endregion implementation
diff --git a/tools/utilities/print/tcc/LayerInspector.tcc b/tools/utilities/print/tcc/LayerInspector.tcc
deleted file mode 100644
index 66b31b7e0..000000000
--- a/tools/utilities/print/tcc/LayerInspector.tcc
+++ /dev/null
@@ -1,174 +0,0 @@
-namespace ell
-{
-static std::string PaddingSchemeToString(ell::predictors::neural::PaddingScheme scheme)
-{
-    switch (scheme)
-    {
-    case ell::predictors::neural::PaddingScheme::zeros:
-        return "zeros";
-    case ell::predictors::neural::PaddingScheme::minusOnes:
-        return "minusOnes";
-    case ell::predictors::neural::PaddingScheme::alternatingZeroAndOnes:
-        return "alternatingZeroAndOnes";
-    case ell::predictors::neural::PaddingScheme::randomZeroAndOnes:
-        return "randomZeroAndOnes";
-    case ell::predictors::neural::PaddingScheme::min:
-        return "min";
-    case ell::predictors::neural::PaddingScheme::max:
-        return "max";
-    }
-    return "";
-}
-
-static std::string ConvolutionMethodToString(ell::predictors::neural::ConvolutionMethod method)
-{
-    switch (method)
-    {
-        /// <summary> Normal method of doing convolution via reshaping input into columns and performing a gemm operation. </summary>
-    case ell::predictors::neural::ConvolutionMethod::automatic:
-        return "automatic";
-    case ell::predictors::neural::ConvolutionMethod::diagonal:
-        return "diagonal";
-    case ell::predictors::neural::ConvolutionMethod::simple:
-        return "simple";
-    case ell::predictors::neural::ConvolutionMethod::winograd:
-        return "winograd";
-    case ell::predictors::neural::ConvolutionMethod::unrolled:
-        return "unrolled";
-    }
-    return "";
-}
-static std::string BinaryConvolutionMethodToString(ell::predictors::neural::BinaryConvolutionMethod method)
-{
-    switch (method)
-    {
-        /// <summary> Normal method of doing convolution via reshaping input into columns and performing a gemm operation. </summary>
-    case ell::predictors::neural::BinaryConvolutionMethod::gemm:
-        return "gemm";
-    case ell::predictors::neural::BinaryConvolutionMethod::bitwise:
-        return "bitwise";
-    };
-    return "";
-}
-static std::string BinaryWeightsScaleToString(ell::predictors::neural::BinaryWeightsScale method)
-{
-    switch (method)
-    {
-        /// <summary> Normal method of doing convolution via reshaping input into columns and performing a gemm operation. </summary>
-    case ell::predictors::neural::BinaryWeightsScale::none:
-        return "none";
-    case ell::predictors::neural::BinaryWeightsScale::mean:
-        return "mean";
-    };
-    return "";
-}
-
-template <typename ElementType>
-std::vector<NameValue> InspectActivationLayerParameters(const ell::predictors::neural::ActivationLayer<ElementType>* layer)
-{
-    std::vector<NameValue> result;
-    auto impl = layer->GetActivationFunction().GetImpl();
-    if (impl)
-    {
-        result.push_back(NameValue{ "activation", impl->GetRuntimeTypeName() });
-    }
-    return result;
-}
-
-template <typename ElementType>
-std::vector<NameValue> InspectBinaryConvolutionalLayerParameters(const ell::predictors::neural::BinaryConvolutionalLayer<ElementType>* layer)
-{
-    std::vector<NameValue> result;
-    auto params = layer->GetConvolutionalParameters();
-    result.push_back(NameValue{ "stride", std::to_string(params.stride) });
-    result.push_back(NameValue{ "method", BinaryConvolutionMethodToString(params.method) });
-    result.push_back(NameValue{ "receptiveField", std::to_string(params.receptiveField) });
-    result.push_back(NameValue{ "weightsScale", BinaryWeightsScaleToString(params.weightsScale) });
-    return result;
-}
-
-template <typename ElementType>
-std::vector<NameValue> InspectConvolutionalLayerParameters(const ell::predictors::neural::ConvolutionalLayer<ElementType>* layer)
-{
-    std::vector<NameValue> result;
-    auto params = layer->GetConvolutionalParameters();
-    auto weights = layer->GetWeights();
-    result.push_back(NameValue{ "stride", std::to_string(params.stride) });
-    result.push_back(NameValue{ "method", ConvolutionMethodToString(params.method) });
-    result.push_back(NameValue{ "receptiveField", std::to_string(params.receptiveField) });
-    result.push_back(NameValue{ "numFilters", std::to_string(params.numFiltersAtATime) });
-    result.push_back(NameValue{ "isSeparable", std::to_string(weights.NumChannels() == 1) });
-    return result;
-}
-
-template <typename ElementType, template <typename> class PoolingFunctionType>
-std::vector<NameValue> InspectPoolingLayerParameters(const ell::predictors::neural::PoolingLayer<ElementType, PoolingFunctionType>* layer)
-{
-    std::vector<NameValue> result;
-    auto params = layer->GetPoolingParameters();
-    result.push_back(NameValue{ "stride", std::to_string(params.stride) });
-    result.push_back(NameValue{ "size", std::to_string(params.poolingSize) });
-    return result;
-}
-
-template <typename ElementType>
-std::vector<NameValue> InspectLayerParameters(const ell::predictors::neural::Layer<ElementType>& layer)
-{
-    std::vector<NameValue> result;
-    auto params = layer.GetLayerParameters();
-    auto input = params.input;
-    auto shape = params.outputShape;
-
-    result.push_back(NameValue{ "shape", "[" + std::to_string(input.NumRows()) + "," + std::to_string(input.NumColumns()) + "," + std::to_string(input.NumChannels()) + "]->" + "[" + std::to_string(shape.NumRows()) + "," + std::to_string(shape.NumColumns()) + "," + std::to_string(shape.NumChannels()) + "]" });
-
-    auto inputpadding = params.inputPaddingParameters;
-    auto outputpadding = params.outputPaddingParameters;
-    if (inputpadding.paddingSize != 0)
-    {
-        result.push_back(NameValue{ "inputPadding", PaddingSchemeToString(inputpadding.paddingScheme) + "," + std::to_string(inputpadding.paddingSize) });
-    }
-    if (outputpadding.paddingSize != 0)
-    {
-        result.push_back(NameValue{ "outputPadding", PaddingSchemeToString(outputpadding.paddingScheme) + "," + std::to_string(outputpadding.paddingSize) });
-    }
-
-    const ell::predictors::neural::ActivationLayer<ElementType>* act = dynamic_cast<const ell::predictors::neural::ActivationLayer<ElementType>*>(&layer);
-    if (act != nullptr)
-    {
-        std::vector<NameValue> more = InspectActivationLayerParameters<ElementType>(act);
-        result.insert(result.end(), more.begin(), more.end());
-    }
-
-    const ell::predictors::neural::BinaryConvolutionalLayer<ElementType>* bcl = dynamic_cast<const ell::predictors::neural::BinaryConvolutionalLayer<ElementType>*>(&layer);
-    if (bcl != nullptr)
-    {
-        std::vector<NameValue> more = InspectBinaryConvolutionalLayerParameters<ElementType>(bcl);
-        result.insert(result.end(), more.begin(), more.end());
-    }
-
-    const ell::predictors::neural::ConvolutionalLayer<ElementType>* conv = dynamic_cast<const ell::predictors::neural::ConvolutionalLayer<ElementType>*>(&layer);
-    if (conv != nullptr)
-    {
-        std::vector<NameValue> more = InspectConvolutionalLayerParameters<ElementType>(conv);
-        result.insert(result.end(), more.begin(), more.end());
-    }
-
-    const ell::predictors::neural::PoolingLayer<ElementType, ell::predictors::neural::MaxPoolingFunction>* maxpooling = dynamic_cast<const ell::predictors::neural::PoolingLayer<ElementType, ell::predictors::neural::MaxPoolingFunction>*>(&layer);
-    if (maxpooling != nullptr)
-    {
-        result.push_back(NameValue{ "function", "maxpooling" });
-        std::vector<NameValue> more = InspectPoolingLayerParameters<ElementType, ell::predictors::neural::MaxPoolingFunction>(maxpooling);
-        result.insert(result.end(), more.begin(), more.end());
-    }
-
-    const ell::predictors::neural::PoolingLayer<ElementType, ell::predictors::neural::MeanPoolingFunction>* meanpooling = dynamic_cast<const ell::predictors::neural::PoolingLayer<ElementType, ell::predictors::neural::MeanPoolingFunction>*>(&layer);
-    if (meanpooling != nullptr)
-    {
-        result.push_back(NameValue{ "function", "meanpooling" });
-        std::vector<NameValue> more = InspectPoolingLayerParameters<ElementType, ell::predictors::neural::MeanPoolingFunction>(meanpooling);
-        result.insert(result.end(), more.begin(), more.end());
-    }
-
-    return result;
-}
-} // namespace ell
diff --git a/tools/utilities/profile/CMakeLists-device-parallel.txt.in b/tools/utilities/profile/CMakeLists-device-parallel.txt.in
index 088b8e443..1c1a724e2 100644
--- a/tools/utilities/profile/CMakeLists-device-parallel.txt.in
+++ b/tools/utilities/profile/CMakeLists-device-parallel.txt.in
@@ -28,7 +28,7 @@ set(CMAKE_POSITION_INDEPENDENT_CODE ON)
 # Compiled profiler that spits out per-node profiling information
 #
 
-set (src 
+set (src
   CompiledProfile_main.cpp
   ProfileReport.cpp
   )
@@ -39,29 +39,28 @@ set (include
 
 source_group("src" FILES ${src})
 source_group("include" FILES ${include})
-source_group("tcc" FILES ${tcc})
 
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 set(CMAKE_THREAD_PREFER_PTHREAD ON)
 find_package(Threads REQUIRED)
 
 # Version using ELL's native object file output
-add_executable(profile ${src} ${include} ${tcc})
+add_executable(profile ${src} ${include})
 target_link_libraries(profile ${CMAKE_CURRENT_SOURCE_DIR}/compiled_model.o ${BLAS_LIBS} Threads::Threads)
 target_compile_definitions(profile PUBLIC COMPILED_ELL_PROFILER)
 
 # Version with LLVM's opt tool optimizing ELL's IR output and then compiling with llc
 if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/compiled_model_opt.o)
-  add_executable(profile_opt ${src} ${include} ${tcc})
+  add_executable(profile_opt ${src} ${include})
   target_link_libraries(profile_opt ${CMAKE_CURRENT_SOURCE_DIR}/compiled_model_opt.o ${BLAS_LIBS} Threads::Threads)
   target_compile_definitions(profile_opt PUBLIC COMPILED_ELL_PROFILER)
 endif()
 
 #
-# Simple program that just exercises the model and exits. Used with low-level system profiling and timing tools 
+# Simple program that just exercises the model and exits. Used with low-level system profiling and timing tools
 #
 
-set (src 
+set (src
   CompiledExerciseModel_main.cpp
   )
 
@@ -70,14 +69,13 @@ set (include
 
 source_group("src" FILES ${src})
 source_group("include" FILES ${include})
-source_group("tcc" FILES ${tcc})
 
 # Version using ELL's native object file output
-add_executable(exercise_model ${src} ${include} ${tcc})
+add_executable(exercise_model ${src} ${include})
 target_link_libraries(exercise_model ${CMAKE_CURRENT_SOURCE_DIR}/compiled_model.o ${BLAS_LIBS} Threads::Threads)
 
 # Version with LLVM's opt tool optimizing ELL's IR output and then compiling with llc
 if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/compiled_model_opt.o)
-  add_executable(exercise_model_opt ${src} ${include} ${tcc})
+  add_executable(exercise_model_opt ${src} ${include})
   target_link_libraries(exercise_model_opt ${CMAKE_CURRENT_SOURCE_DIR}/compiled_model_opt.o ${BLAS_LIBS} Threads::Threads)
 endif()
diff --git a/tools/utilities/profile/CMakeLists-device.txt.in b/tools/utilities/profile/CMakeLists-device.txt.in
index 254a5e413..e2bca6735 100644
--- a/tools/utilities/profile/CMakeLists-device.txt.in
+++ b/tools/utilities/profile/CMakeLists-device.txt.in
@@ -26,7 +26,7 @@ set(CMAKE_POSITION_INDEPENDENT_CODE ON)
 # Compiled profiler that spits out per-node profiling information
 #
 
-set (src 
+set (src
   CompiledProfile_main.cpp
   ProfileReport.cpp
   )
@@ -37,25 +37,24 @@ set (include
 
 source_group("src" FILES ${src})
 source_group("include" FILES ${include})
-source_group("tcc" FILES ${tcc})
 
 # Version using ELL's native object file output
-add_executable(profile ${src} ${include} ${tcc})
+add_executable(profile ${src} ${include})
 target_link_libraries(profile ${CMAKE_CURRENT_SOURCE_DIR}/compiled_model.o ${BLAS_LIBS})
 target_compile_definitions(profile PUBLIC COMPILED_ELL_PROFILER)
 
 # Version with LLVM's opt tool optimizing ELL's IR output and then compiling with llc
 if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/compiled_model_opt.o)
-  add_executable(profile_opt ${src} ${include} ${tcc})
+  add_executable(profile_opt ${src} ${include})
   target_link_libraries(profile_opt ${CMAKE_CURRENT_SOURCE_DIR}/compiled_model_opt.o ${BLAS_LIBS})
   target_compile_definitions(profile_opt PUBLIC COMPILED_ELL_PROFILER)
 endif()
 
 #
-# Simple program that just exercises the model and exits. Used with low-level system profiling and timing tools 
+# Simple program that just exercises the model and exits. Used with low-level system profiling and timing tools
 #
 
-set (src 
+set (src
   CompiledExerciseModel_main.cpp
   )
 
@@ -64,14 +63,13 @@ set (include
 
 source_group("src" FILES ${src})
 source_group("include" FILES ${include})
-source_group("tcc" FILES ${tcc})
 
 # Version using ELL's native object file output
-add_executable(exercise_model ${src} ${include} ${tcc})
+add_executable(exercise_model ${src} ${include})
 target_link_libraries(exercise_model ${CMAKE_CURRENT_SOURCE_DIR}/compiled_model.o ${BLAS_LIBS})
 
 # Version with LLVM's opt tool optimizing ELL's IR output and then compiling with llc
 if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/compiled_model_opt.o)
-  add_executable(exercise_model_opt ${src} ${include} ${tcc})
+  add_executable(exercise_model_opt ${src} ${include})
   target_link_libraries(exercise_model_opt ${CMAKE_CURRENT_SOURCE_DIR}/compiled_model_opt.o ${BLAS_LIBS})
 endif()
diff --git a/tools/utilities/profile/CMakeLists.txt b/tools/utilities/profile/CMakeLists.txt
index 65e38e5c2..5bfe6f18b 100644
--- a/tools/utilities/profile/CMakeLists.txt
+++ b/tools/utilities/profile/CMakeLists.txt
@@ -20,12 +20,11 @@ set (src
 
 source_group("src" FILES ${src})
 source_group("include" FILES ${include})
-source_group("tcc" FILES ${tcc})
 
 # create executable in build/bin
 set (GLOBAL_BIN_DIR ${CMAKE_BINARY_DIR}/bin)
 set (EXECUTABLE_OUTPUT_PATH ${GLOBAL_BIN_DIR})
-add_executable(${tool_name} ${src} ${include} ${tcc})
+add_executable(${tool_name} ${src} ${include})
 target_include_directories(${tool_name} PRIVATE include ${ELL_LIBRARIES_DIR} ${CMAKE_CURRENT_LIST_DIR}/..)
 target_link_libraries(${tool_name} common emitters model nodes passes utilities pythonPlugins)
 copy_shared_libraries(${tool_name})
diff --git a/tools/wrap/wrap.py b/tools/wrap/wrap.py
index 170ebe9e2..f9d00ed27 100755
--- a/tools/wrap/wrap.py
+++ b/tools/wrap/wrap.py
@@ -66,7 +66,6 @@ def __init__(self):
         self.config = None
         self.files = []
         self.includes = []
-        self.tcc = []
         self.tools = None
         self.language = "python"
         self.target = "host"
@@ -180,7 +179,7 @@ def copy_files(self, filelist, folder):
     def create_template_file(self, template_filename, output_filename):
         with open(template_filename) as f:
             template = f.read()
-        
+
         template = template.replace("@ELL_outdir@", os.path.basename(self.output_dir))
         template = template.replace("@ELL_model@", self.model_file_base)
         template = template.replace("@ELL_model_name@", self.model_name)
@@ -215,21 +214,20 @@ def run(self):
         self.find_files()
         self.copy_files(self.files, "")
         self.copy_files(self.includes, "include")
-        self.copy_files(self.tcc, "tcc")
         out_file = self.tools.compile(
-            model_file=self.model_file, 
-            func_name=self.func_name, 
-            model_name=self.model_name, 
-            target=self.target, 
-            output_dir=self.output_dir, 
-            use_blas=self.blas, 
-            fuse_linear_ops=self.fuse_linear_ops, 
+            model_file=self.model_file,
+            func_name=self.func_name,
+            model_name=self.model_name,
+            target=self.target,
+            output_dir=self.output_dir,
+            use_blas=self.blas,
+            fuse_linear_ops=self.fuse_linear_ops,
             profile=self.profile,
-            llvm_format=self.llvm_format, 
-            optimize=self.optimize, 
-            debug=self.debug, 
-            is_model_file=False, 
-            swig=self.swig, 
+            llvm_format=self.llvm_format,
+            optimize=self.optimize,
+            debug=self.debug,
+            is_model_file=False,
+            swig=self.swig,
             header=self.cpp_header,
             objext="." + self.objext,
             extra_options=self.compile_args