Skip to content
This repository has been archived by the owner on Jul 23, 2022. It is now read-only.

Commit

Permalink
Fixes #8, #9, refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
sdpython committed Oct 13, 2018
1 parent cd68d6e commit ea856ef
Show file tree
Hide file tree
Showing 32 changed files with 993 additions and 301 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -281,3 +281,6 @@ src/csharpyml/binaries/*.lib
src/csharpyml/binaries/*.xml
src/csharpyml/binaries/*.json
*.err
_doc/sphinxdoc/source/components
cscode/packages
_doc/examples/model.zip
6 changes: 0 additions & 6 deletions _doc/examples/README.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,3 @@
Gallery of examples
===================


First section
-------------

One example of a gallery.

60 changes: 60 additions & 0 deletions _doc/examples/plot_cspipeline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
"""
Trains a Random Forest on Iris dataset
======================================
The following example shows how to create and train
a pipeline using :ref:`l-fasttree-(boosted-trees)-classification`.
"""
import sys
import os
import unittest
import numpy
from sklearn import datasets
from sklearn.model_selection import train_test_split
import pandas
from csharpyml.binaries import CSPipeline

##############################
# Let's first retrieve the data.

X, y = datasets.load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(
X.astype(numpy.float32), y.astype(numpy.float32))
df_train = pandas.DataFrame(data=X_train, columns=["FA", "FB", "FC", "FD"])
df_train["Label"] = y_train

df_test = pandas.DataFrame(data=X_test, columns=["FA", "FB", "FC", "FD"])
df_test["Label"] = y_test

##############################
# Let's create a pipeline.
pipe = CSPipeline(["concat{col=Feat:FA,FB,FC,FD}"],
"oova{p=ft}", verbose=2)

#############################
# Let's train it.
pipe.fit(df_train, feature="Feat", label="Label")

###############################################
# Let's show the output.

print(pipe.StdOut)

#################################
# Let's predict.

pred = pipe.predict(df_test)
print(pred.head())

###########################
# Let's save the model.

outfile = "model.zip"
pipe.save(outfile)

#############################
# Let's load it.

pipe2 = CSPipeline.load(outfile)
pred2 = pipe2.predict(df_test)
print(pred2.head())
271 changes: 141 additions & 130 deletions _doc/notebooks/csharp_for_ml_in_notebook.ipynb

Large diffs are not rendered by default.

12 changes: 12 additions & 0 deletions _doc/sphinxdoc/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@

blog_root = "http://www.xavierdupre.fr/app/csharpyml/helpsphinx/"

extensions.extend(['csharpyml.sphinxext.sphinx_mlext'])

html_context = {
'css_files': get_default_stylesheet() + ['_static/my-styles.css', '_static/gallery.css'],
}
Expand All @@ -38,10 +40,20 @@ def custom_latex_processing(latex):
epkg_dictionary.update({
'C#': 'https://en.wikipedia.org/wiki/C_Sharp_(programming_language)',
'C# DataFrame': 'https://github.com/sdpython/machinelearningext/blob/master/machinelearningext/DataManipulation/DataFrame.cs',
'C# IDataView': 'https://github.com/dotnet/machinelearning/blob/master/src/Microsoft.ML.Core/Data/IDataView.cs',
'C# LogWriter': 'https://github.com/xadupre/machinelearningext/blob/master/machinelearningext/PipelineHelper/DelegateEnvironment.cs',
'C# Pipeline': 'https://github.com/sdpython/machinelearningext/blob/master/machinelearningext/ScikitAPI/ScikitPipeline.cs',
'C# ScikitPipeline': 'https://github.com/xadupre/machinelearningext/blob/master/machinelearningext/ScikitAPI/ScikitPipeline.cs',
'csv': 'https://en.wikipedia.org/wiki/Comma-separated_values',
'DataFrame': 'https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.html',
'DataKind': 'https://github.com/dotnet/machinelearning/blob/master/src/Microsoft.ML.Core/Data/DataKind.cs#L13',
'DBSCAN': 'https://en.wikipedia.org/wiki/DBSCAN',
'ML.net': 'https://github.com/dotnet/machinelearning',
'OPTICS': 'https://en.wikipedia.org/wiki/OPTICS_algorithm',
'Windows': 'https://www.microsoft.com/',
})


from recommonmark.parser import CommonMarkParser
source_parsers = {'.md': CommonMarkParser}
source_suffix = ['.rst', '.md']
31 changes: 7 additions & 24 deletions _doc/sphinxdoc/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,16 @@ Documentation
:maxdepth: 1

api/index
mlnetdocs/index
components/index
i_ex
i_faq
i_nb
all_notebooks
blog/blogindex
indexmenu
HISTORY
license

It can easily compile and wrap a :epkg:`C#` function
into :epkg:`Python`:
Expand All @@ -81,15 +88,6 @@ The list of available trainers can be obtained with:
This function also exists as a magic command
:ref:`%%maml <cmagic-maml>`.

Galleries
---------

.. toctree::
:maxdepth: 2

all_notebooks
blog/blogindex

Installation
------------

Expand All @@ -103,21 +101,6 @@ Follow the instructions described in
Follow the instructions described in
`config.yml <https://github.com/sdpython/csharpyml/blob/master/.circleci/config.yml>`_.

Navigation
----------

.. toctree::
:maxdepth: 1

indexmenu
HISTORY
license

.. toctree::
:hidden:

blog/index_blog

+----------------------+---------------------+---------------------+--------------------+------------------------+------------------------------------------------+
| :ref:`l-modules` | :ref:`l-functions` | :ref:`l-classes` | :ref:`l-methods` | :ref:`l-staticmethods` | :ref:`l-properties` |
+----------------------+---------------------+---------------------+--------------------+------------------------+------------------------------------------------+
Expand Down
5 changes: 5 additions & 0 deletions _doc/sphinxdoc/source/indexmenu.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,8 @@ Through documentation
filechanges
README
all_indexes

.. toctree::
:hidden:

blog/index_blog
Binary file added _doc/sphinxdoc/source/nbcov-2018-10-13.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added _doc/sphinxdoc/source/nbcov.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
73 changes: 43 additions & 30 deletions _unittests/ut_binaries/test_cspipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import sys
import os
import unittest
from io import StringIO
from contextlib import redirect_stdout, redirect_stderr
import numpy
from sklearn import datasets
from sklearn.model_selection import train_test_split
Expand Down Expand Up @@ -38,34 +40,45 @@ def test_src(self):
self.assertFalse(datasets is None)

def test_predictor(self):
X, y = datasets.load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(
X.astype(numpy.float32), y.astype(numpy.float32))
df_train = pandas.DataFrame(data=X_train, columns=[
"FA", "FB", "FC", "FD"])
df_train["Label"] = y_train
pipe = CSPipeline(["concat{col=Feat:FA,FB,FC,FD}"],
"ova{p=lr}", stdout="C#", verbose=0)
pipe.fit(df_train, feature="Feat", label="Label")
df_test = pandas.DataFrame(data=X_test, columns=[
"FA", "FB", "FC", "FD"])
self.assertIsInstance(df_test, pandas.DataFrame)
df_test["Label"] = y_test
pred = pipe.predict(df_test)
head = pred.head()
exp = ['FA', 'FB', 'FC', 'FD', 'Label', 'Feat.0', 'Feat.1', 'Feat.2',
'Feat.3', 'PredictedLabel', 'Score.0', 'Score.1', 'Score.2']
self.assertEqual(list(head.columns), exp)
self.assertEqual(pred.shape, (38, 13))
acc = (pred.Label + 1 - pred.PredictedLabel).abs().sum()
self.assertLesser(acc, 10)
# Save
temp = get_temp_folder(__file__, "temp_predictor")
outfile = os.path.join(temp, "iris.zip")
pipe.save(outfile)
pipe2 = CSPipeline.load(outfile)
pred2 = pipe2.predict(df_test)
self.assertEqual(pred, pred2)
fout = StringIO()
ferr = StringIO()
with redirect_stdout(fout):
with redirect_stderr(ferr):
X, y = datasets.load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(
X.astype(numpy.float32), y.astype(numpy.float32))
df_train = pandas.DataFrame(data=X_train, columns=[
"FA", "FB", "FC", "FD"])
df_train["Label"] = y_train
pipe = CSPipeline(["concat{col=Feat:FA,FB,FC,FD}"],
"oova{p=ap}", verbose=2)
pipe.fit(df_train, feature="Feat", label="Label")
stdout = pipe.StdOut
self.assertIn('Training learner 1', stdout)
self.assertEqual('', pipe.StdErr)
df_test = pandas.DataFrame(data=X_test, columns=[
"FA", "FB", "FC", "FD"])
self.assertIsInstance(df_test, pandas.DataFrame)
df_test["Label"] = y_test
pred = pipe.predict(df_test)
head = pred.head()
exp = ['FA', 'FB', 'FC', 'FD', 'Label', 'Feat.0', 'Feat.1', 'Feat.2',
'Feat.3', 'PredictedLabel', 'Score.0', 'Score.1', 'Score.2']
self.assertEqual(list(head.columns), exp)
self.assertEqual(pred.shape, (38, 13))
acc = (pred.Label + 1 - pred.PredictedLabel).abs().sum()
self.assertLesser(acc, 10)
# Save
temp = get_temp_folder(__file__, "temp_predictor")
outfile = os.path.join(temp, "iris.zip")
pipe.save(outfile)
pipe2 = CSPipeline.load(outfile)
pred2 = pipe2.predict(df_test)
self.assertEqual(pred, pred2)
out = fout.getvalue()
err = ferr.getvalue()
self.assertEqual(out, '')
self.assertEqual(err, '')

def test_transform(self):
X, y = datasets.load_iris(return_X_y=True)
Expand All @@ -75,7 +88,7 @@ def test_transform(self):
"FA", "FB", "FC", "FD"])
df_train["Label"] = y_train
pipe = CSPipeline(
["concat{col=Feat:FA,FB,FC,FD}", "poly{col=Feat}"], stdout="python", verbose=0)
["concat{col=Feat:FA,FB,FC,FD}", "poly{col=Feat}"], verbose=0)
pipe.fit(df_train)
df_test = pandas.DataFrame(data=X_test, columns=[
"FA", "FB", "FC", "FD"])
Expand All @@ -100,7 +113,7 @@ def test_transform_array(self):
X_train, X_test, __, _ = train_test_split(
X.astype(numpy.float32), y.astype(numpy.float32))
pipe = CSPipeline(
["concat{col=Feat:X0,X1,X2,X3}", "poly{col=Feat}"], stdout="python", verbose=0)
["concat{col=Feat:X0,X1,X2,X3}", "poly{col=Feat}"], verbose=0)
pipe.fit(X_train)
pred = pipe.transform(X_test)
head = pred.head()
Expand Down
2 changes: 1 addition & 1 deletion _unittests/ut_binaries/test_maml.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def test_maml(self):

out, _ = maml(script)
self.assertExists(model)
self.assertIn("LBFGS Optimizer", out)
self.assertIn("'Normalize' finished", out)


if __name__ == "__main__":
Expand Down
2 changes: 1 addition & 1 deletion _unittests/ut_binaries/test_maml_nb.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def test_maml_nb(self):
data=__DATA__
loader=text{col=Label:U4[0-2]:0 col=Slength:R4:1 col=Swidth:R4:2 col=Plength:R4:3 col=Pwidth:R4:4 sep=, header=+}
xf=Concat{col=Features:Slength,Swidth}
tr=ova{p=lr}
tr=oova{p=lr}
out=__MODEL__
""".strip("\n ").replace('__MODEL__', model).replace('__DATA__', dest)

Expand Down
35 changes: 21 additions & 14 deletions build.cmd
Original file line number Diff line number Diff line change
Expand Up @@ -9,38 +9,43 @@ if not exist %ppythonpath% set ppythonpath="c:\Python36_x64"
set PATH=%PATH%;%ppythonpath%
set PYTHONPATH=%~dp0..\pyquickhelper\src

set DOTNET_CLI_TELEMETRY_OPTOUT=1
set DOTNET_SKIP_FIRST_TIME_EXPERIENCE=1
set DOTNET_MULTILEVEL_LOOKUP=0

set LOCALMLEXT=%~dp0..\machinelearningext
if exist %LOCALMLEXT% goto copybinaries:

cd cscode\machinelearning
if "%1" == "ml" goto buildrelease:
if exist bin\x64.Release goto mldeb:
@echo [build.cmd] build machinelearning release
:buildrelease:
cmd /C build.cmd -release
if "%1" == "ml" goto buildml:
if exist bin goto mlend:
@echo [build.cmd] build machinelearning debug and release
:buildml:
cmd /C build.cmd
if %errorlevel% neq 0 exit /b %errorlevel%
:mldeb:
if "%1" == "ml" goto builddebug:
if exist bin\x64.Debug goto mlrel:
:builddebug:
@echo [build.cmd] build machinelearning debug
cmd /C build.cmd -debug
cmd /C build.cmd -release
if %errorlevel% neq 0 exit /b %errorlevel%
:mlrel:
:mlend:
cd ..\..

:copybinaries:
if "%1" == "ml" goto copydebug:
if exist cscode\machinelearning\bin\x64.Debug goto copymlrel:
if exist cscode\machinelearning\bin\AnyCPU.Debug\Microsoft.ML.Api goto copymlrel:
:copydebug:
@echo [build.cmd] copy debug binaries for machinelearning
python -u setup.py copybinml debug
if %errorlevel% neq 0 exit /b %errorlevel%
:copymlrel:
if "%1" == "ml" goto copyrelease:
if exist cscode\machinelearning\bin\x64.Release goto copybin:
if exist cscode\machinelearning\bin\AnyCPU.Release\Microsoft.ML.Api goto copybin:
:copyrelease:
@echo [build.cmd] copy release binaries for machinelearning
python -u setup.py copybinml release
if %errorlevel% neq 0 exit /b %errorlevel%

:copybin:
if exist %LOCALMLEXT% goto copybinariesext:

@echo [build.cmd] build machinelearningext
cd cscode\machinelearningext\machinelearningext
dotnet build -c Release machinelearningext.sln
Expand All @@ -49,6 +54,8 @@ dotnet build -c debug machinelearningext.sln
if %errorlevel% neq 0 exit /b %errorlevel%
cd ..\..\..

:copybinariesext:

@echo [build.cmd] copy binaries for machinelearningext
python -u setup.py copybinmlext debug
if %errorlevel% neq 0 exit /b %errorlevel%
Expand Down
Loading

0 comments on commit ea856ef

Please sign in to comment.