Skip to content
Permalink
Browse files

add first attempt for a c# pipeline called from python

  • Loading branch information...
sdpython committed Aug 6, 2018
1 parent ef6e867 commit 2c66172430f5a1371d848fcda2e0069881bae939
@@ -38,6 +38,7 @@ def custom_latex_processing(latex):
epkg_dictionary.update({
'C#': 'https://en.wikipedia.org/wiki/C_Sharp_(programming_language)',
'C# DataFrame': 'https://github.com/sdpython/machinelearningext/blob/master/machinelearningext/DataManipulation/DataFrame.cs',
'C# Pipeline': 'https://github.com/sdpython/machinelearningext/blob/master/machinelearningext/ScikitAPI/ScikitPipeline.cs',
'csv': 'https://en.wikipedia.org/wiki/Comma-separated_values',
'DataFrame': 'https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.html',
'DataKind': 'https://github.com/dotnet/machinelearning/blob/master/src/Microsoft.ML.Core/Data/DataKind.cs#L13',
@@ -0,0 +1,53 @@
"""
@brief test log(time=3s)
You should indicate a time in seconds. The program ``run_unittests.py``
will sort all test files by increasing time and run them.
"""
import sys
import os
import unittest
import numpy
from sklearn import datasets
from sklearn.model_selection import train_test_split
import pandas
from pyquickhelper.pycode import ExtTestCase

try:
import src
except ImportError:
path = os.path.normpath(
os.path.abspath(
os.path.join(
os.path.split(__file__)[0],
"..",
"..")))
if path not in sys.path:
sys.path.append(path)
import src

from src.csharpyml.binaries import CSPipeline


class TestCsPipeline(ExtTestCase):
"""Test C# Pipeline."""

def test_src(self):
"skip pylint"
self.assertFalse(src is None)
self.assertFalse(datasets is None)

def test_iris(self):
X, y = datasets.load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(
X.astype(numpy.float32), y.astype(numpy.float32))
assert X_test is not None
assert y_test is not None
df = pandas.DataFrame(data=X_train, columns=["FA", "FB", "FC", "FD"])
df["Label"] = y_train
pipe = CSPipeline(["concat{col=Feat:FA,FB,FC,FD}"], "lr")
pipe.fit(df, feature="Feat", label="Label")


if __name__ == "__main__":
unittest.main()
@@ -51,6 +51,7 @@ def test_style_src(self):
"Unable to import 'CSharPyMLExtension'",
'W0212: Access to a protected member _obj',
'add_reference.py:21: W0703',
"Unable to import 'src.csharpyml.cparts.cmodule'",
])

def test_style_test(self):
@@ -56,9 +56,14 @@
<Reference Include="Microsoft.ML.Sweeper">
<HintPath>..\bin\machinelearning\$(Configuration)\Microsoft.ML.Sweeper.dll</HintPath>
</Reference>
<Reference Include="Microsoft.ML.Ext.DataManipulation">
<Reference Include="Scikit.ML.DataManipulation">
<HintPath>..\bin\machinelearningext\$(Configuration)\Scikit.ML.DataManipulation.dll</HintPath>
</Reference>
<Reference Include="Scikit.ML.PipelineHelper">
<HintPath>..\bin\machinelearningext\$(Configuration)\Scikit.ML.PipelineHelper.dll</HintPath>
</Reference>
<Reference Include="Scikit.ML.ScikitAPI">
<HintPath>..\bin\machinelearningext\$(Configuration)\Scikit.ML.ScikitAPI.dll</HintPath>
</Reference>
</ItemGroup>

</Project>
@@ -9,7 +9,7 @@
namespace CSharPyMLExtension
{
/// <summary>
/// Easier function to use from Python.
/// Easier functions to use from Python.
/// </summary>
public static class DataFrameHelper
{
@@ -1,8 +1,6 @@
// See the LICENSE file in the project root for more information.

using System;
using System.IO;
using System.Text;
using Microsoft.ML.Runtime.Tools;


@@ -19,7 +17,7 @@ public MamlException(string msg) : base(msg)
}

/// <summary>
/// Helpers to run script through maml.
/// Helpers to run scripts through maml.
/// </summary>
public static class MamlHelper
{
@@ -0,0 +1,30 @@
// See the LICENSE file in the project root for more information.

using System.IO;
using Microsoft.ML.Runtime;
using Scikit.ML.ScikitAPI;


namespace CSharPyMLExtension
{
/// <summary>
/// Easier functions to use from Python.
/// </summary>
public static class PipelineHelper
{
public static ScikitPipeline CreateScikitPipeline(string filename, IHostEnvironment host = null)
{
return new ScikitPipeline(filename, host);
}

public static ScikitPipeline CreateScikitPipeline(Stream st, IHostEnvironment host = null)
{
return new ScikitPipeline(st, host);
}

public static ScikitPipeline CreateScikitPipeline(string[] transforms = null, string predictor = null, IHostEnvironment host = null)
{
return new ScikitPipeline(transforms, predictor, host);
}
}
}
@@ -8,7 +8,7 @@
namespace CSharPyMLExtension
{
/// <summary>
/// Capture standard output and error.
/// Captures standard output and error.
/// </summary>
public class StdCapture: IDisposable
{
@@ -333,6 +333,7 @@ def copy_assemblies(lib=None, version="Release"):
folders = ['cscode/machinelearningext/machinelearningext/DataManipulation/bin/%s' % version,
'cscode/machinelearningext/machinelearningext/PipelineHelper/bin/%s' % version,
'cscode/machinelearningext/machinelearningext/PipelineTransforms/bin/%s' % version,
'cscode/machinelearningext/machinelearningext/ScikitAPI/bin/%s' % version,
]
dests = ['cscode/bin/machinelearningext/%s' % version,
'src/csharpyml/binaries/%s' % version,
@@ -4,4 +4,5 @@
"""
from .add_reference import AddReference, add_csharpml_extension
from .cs_dataframe import CSDataFrame
from .cs_pipeline import CSPipeline
from .maml_helper import maml
@@ -1,6 +1,6 @@
"""
@file
@brief Makes C# Dataframe available in Python.
@brief Makes :epkg:`C# DataFrame` available in :epkg:`Python`.
"""
from collections import OrderedDict
import numpy
@@ -0,0 +1,52 @@
"""
@file
@brief Makes :epkg:`C# ScikitPipeline` available in :epkg:`Python`.
"""
import pandas
from .add_reference import add_csharpml_extension
from .cs_dataframe import CSDataFrame


class CSPipeline:
"""
Wraps :epkg:`C# ScikitPipeline`.
"""

@staticmethod
def get_cs_class():
"""
Returns the :epkg:`C#` class used to interact
with :epkg:`C# Pipeline`.
"""
add_csharpml_extension()
from CSharPyMLExtension import PipelineHelper
return PipelineHelper

def __init__(self, transforms=None, predictor=None):
"""
Creates a pipeline :epkg:`C# Pipeline`.
@param transforms list of transforms (can be None)
@param predictor predictor (can be None)
"""
PipelineHelper = CSPipeline.get_cs_class()
self._obj = PipelineHelper.CreateScikitPipeline(transforms, predictor)

def fit(self, data, feature=None, label=None, group_id=None, weight=None):
"""
Fits a pipeline.
@param data dataframe (*pandas* or *C#*)
@param feature if a predictor is specified,
specifiy which column to use as features
@param label if a supervized predictor is specified,
specifiy which column to use as label
@param weight if a predictor is specified,
specifiy which column to use as weight
@param group_id if a ranker is specified,
specifiy which column to use as features
@return self
"""
if isinstance(data, pandas.DataFrame):
data = CSDataFrame.read_df(data)
self._obj.Train(data._obj, feature, label, weight, group_id)

0 comments on commit 2c66172

Please sign in to comment.
You can’t perform that action at this time.