# ML2CPP

## Preparing the dataset

In [1]:
from sklearn import datasets
import numpy as np
import pandas as pd

def populate_table(tablename, feature_names):
    iris = datasets.load_iris()
    X = iris.data  
    N = X.shape[0]
    y = iris.target.reshape(N,1)
    k = np.arange(N).reshape(N, 1)
    k_X_y = np.concatenate((k, X, y) , axis=1)
    lTable=pd.DataFrame(k_X_y)
    # print(lTable.head())
    lTable.columns = ['idx'] + feature_names + ['TGT'];
    lTable['TGT'] = lTable['TGT'].apply(int)
    lTable['idx'] = lTable['idx'].apply(int)
    lTable.to_csv(tablename , float_format='%.14g')



In [2]:
metadata = {"primary_key" : "KEY",
            "features" : ['sepal_length_cm', 'sepal_width_cm', 'petal_length_cm', 'petal_width_cm'],
            "targets" : ["TGT"],
            "table" : "iris"}

In [3]:
populate_table("/tmp/iris.csv" , metadata["features"])


In [4]:
df = pd.read_csv("/tmp/iris.csv")
df.sample(12, random_state=1960)

Unnamed: 0.1,Unnamed: 0,idx,sepal_length_cm,sepal_width_cm,petal_length_cm,petal_width_cm,TGT
114,114,114,5.8,2.8,5.1,2.4,2
74,74,74,6.4,2.9,4.3,1.3,1
9,9,9,4.9,3.1,1.5,0.1,0
88,88,88,5.6,3.0,4.1,1.3,1
25,25,25,5.0,3.0,1.6,0.2,0
5,5,5,5.4,3.9,1.7,0.4,0
48,48,48,5.3,3.7,1.5,0.2,0
117,117,117,7.7,3.8,6.7,2.2,2
83,83,83,6.0,2.7,5.1,1.6,1
105,105,105,7.6,3.0,6.6,2.1,2


## Training a Model

In [5]:
import torch
from torch import nn
import torch.nn.functional as F

from skorch import NeuralNetClassifier

torch.manual_seed(1234)

torch.set_default_tensor_type('torch.DoubleTensor')

num_units = 15
# Example of using Sequential
model = nn.Sequential(
    nn.Linear(4, num_units),
    nn.Dropout(),
    nn.ReLU(),
    nn.Linear(num_units , num_units * 2),
    nn.Dropout(),
    nn.Hardshrink(),
    nn.Linear(num_units * 2, 3),
    nn.Softmax())

clf = NeuralNetClassifier(
    model,
    max_epochs=10,
    lr=0.1,
)

clf.fit(df[metadata['features']].values, df[metadata['targets']].values.ravel())

  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        [36m1.2246[0m       [32m0.1667[0m        [35m1.0035[0m  0.0164
      2        1.2813       [32m0.6333[0m        [35m0.9924[0m  0.0055
      3        [36m1.0901[0m       [32m0.6667[0m        [35m0.9621[0m  0.0057
      4        1.1053       0.6000        [35m0.9409[0m  0.0058
      5        [36m0.9949[0m       0.6667        0.9440  0.0054
      6        [36m0.9680[0m       0.6000        [35m0.9318[0m  0.0055
      7        1.0221       0.6667        [35m0.9193[0m  0.0057
      8        1.0283       0.6667        [35m0.9037[0m  0.0053
      9        [36m0.9534[0m       0.6667        [35m0.8861[0m  0.0055
     10        0.9599       0.4667        [35m0.8718[0m  0.0058


  input = module(input)


<class 'skorch.classifier.NeuralNetClassifier'>[initialized](
  module_=Sequential(
    (0): Linear(in_features=4, out_features=15, bias=True)
    (1): Dropout(p=0.5, inplace=False)
    (2): ReLU()
    (3): Linear(in_features=15, out_features=30, bias=True)
    (4): Dropout(p=0.5, inplace=False)
    (5): Hardshrink(0.5)
    (6): Linear(in_features=30, out_features=3, bias=True)
    (7): Softmax(dim=None)
  ),
)

## Deploying the Model

In [6]:

def generate_cpp_for_model(model):
    import pickle, json, requests, base64
    b64_data = base64.b64encode(pickle.dumps(model)).decode('utf-8')
    # send the model th the web service
    json_data={"Name":"model_cpp_sample", 
               "PickleData":b64_data , 
               "SQLDialect":"CPP",
               "FeatureNames" : metadata['features']}
    r = requests.post("https://sklearn2sql.herokuapp.com/model", json=json_data)
    content = r.json()
    lCPP = content["model"]["SQLGenrationResult"][0]["SQL"]
    # print(lCPP);
    return lCPP


lCPPCode = generate_cpp_for_model(clf);


In [7]:
print(lCPPCode)

namespace  {

	std::vector<std::any> get_classes(){
		std::vector<std::any> lClasses = { 0, 1, 2 };

		return lClasses;
	}

	namespace layer_0 {
	
		std::vector<std::string> get_input_names(){
			std::vector<std::string> lFeatures = { "Feature_0", "Feature_1", "Feature_2", "Feature_3" };
	
			return lFeatures;
		}
	
		std::vector<std::string> get_output_names(){
			std::vector<std::string> lOutputs = { "output_0", "output_1", "output_2", "output_3", "output_4", "output_5", "output_6", "output_7", "output_8", "output_9", "output_10", "output_11", "output_12", "output_13", "output_14" };
	
			return lOutputs;
		}
	
		tTable compute_features(std::any Feature_0, std::any Feature_1, std::any Feature_2, std::any Feature_3) {
	
			tTable lTable;
	
			lTable["output_0"] = { -0.30603992256711465 + -0.2681658279115361 * Feature_0 + -0.4212461386497293 * Feature_1 + -0.03359383959203077 * Feature_2 + -0.08562051231388146 * Feature_3 };
			lTable["output_1"] = { 0.32941788454291143 + -0.1095505425

In [8]:
    def write_text_to_file(iCPPCode, oCPPFile):          
        with open(oCPPFile, "w") as text_file:
            text_file.write(iCPPCode)

    def add_cpp_main_function(iCPPCode, iCSVFile):
        lCPPCode = "#include \"Generic.i\"\n\n"
        lCPPCode = lCPPCode + iCPPCode
        lCPPCode = lCPPCode + "\tint main() {\n"
        lCPPCode = lCPPCode + "\t\tscore_csv_file(\"" + iCSVFile +"\");\n"
        lCPPCode = lCPPCode + "\treturn 0;\n}\n"
        return lCPPCode

    def compile_cpp_code_as_executable(iName):
        import subprocess
        lCommand = ["g++", "-Wall", "-Wno-unused-function", "-std=c++17" , "-g" ,  "-o", iName + ".exe",  iName + ".cpp"]
        print("EXECUTING" , "'" + " ".join(lCommand) + "'")
        result = subprocess.check_output(lCommand)
        # print(result)

    def execute_cpp_model(iName, iCSVFile):
        import subprocess
        result2 = subprocess.check_output([iName + ".exe",  iCSVFile])
        result2 = result2.decode()
        print(result2[:1000])
        print(result2[-1000:])
        return result2
        
    def execute_cpp_code(iCPPCode, iCSVFile):
        lName = "/tmp/sklearn2sql_cpp_" + str(id(clf));
        lCPPCode = add_cpp_main_function(iCPPCode, iCSVFile)
        write_text_to_file(lCPPCode, lName + ".cpp")
        compile_cpp_code_as_executable(lName)
        result = execute_cpp_model(lName, iCSVFile)
        write_text_to_file(str(result), lName + ".out")
        return lName + ".out"


In [9]:
populate_table("/tmp/iris2.csv" , ["Feature_0", "Feature_1", "Feature_2", "Feature_3"])
lCPPOutput = execute_cpp_code(lCPPCode , "/tmp/iris2.csv")
cpp_output = pd.read_csv(lCPPOutput)

EXECUTING 'g++ -Wall -Wno-unused-function -std=c++17 -g -o /tmp/sklearn2sql_cpp_140025616790960.exe /tmp/sklearn2sql_cpp_140025616790960.cpp'
idx,Score_0,Score_1,Score_2,Proba_0,Proba_1,Proba_2,LogProba_0,LogProba_1,LogProba_2,Decision,DecisionProba
0,,,,0.45019637268157,0.30069463258808,0.24910899473035,-0.79807140766864,-1.20166003887067,-1.38986474845242,0,0.45019637268157
1,,,,0.43615671867620,0.30582560886671,0.25801767245709,-0.82975365371348,-1.18474024515206,-1.35472719851069,0,0.43615671867620
2,,,,0.45033926969533,0.29348284744739,0.25617788285728,-0.79775404762594,-1.22593608282759,-1.36188322089122,0,0.45033926969533
3,,,,0.43990095721080,0.29669170360601,0.26340733918320,-0.82120567465621,-1.21506171430750,-1.33405362679918,0,0.43990095721080
4,,,,0.45549589055204,0.28307489364809,0.26142921579986,-0.78636858416246,-1.26204377448433,-1.34159171739680,0,0.45549589055204
5,,,,0.44887987580348,0.29297142827788,0.25814869591864,-0.80099996422035,-1.22768018912988,-1.3542195193

In [10]:
cpp_output.sample(12, random_state=1960)

Unnamed: 0,idx,Score_0,Score_1,Score_2,Proba_0,Proba_1,Proba_2,LogProba_0,LogProba_1,LogProba_2,Decision,DecisionProba
114,114,,,,0.178223,0.396729,0.425048,-1.724719,-0.924501,-0.855554,2,0.425048
74,74,,,,0.224922,0.387287,0.387791,-1.492002,-0.948589,-0.947289,2,0.387791
9,9,,,,0.43468,0.306157,0.259163,-0.833145,-1.183657,-1.350299,0,0.43468
88,88,,,,0.21761,0.385556,0.396834,-1.525051,-0.953069,-0.924237,2,0.396834
25,25,,,,0.428909,0.309034,0.262057,-0.846511,-1.174303,-1.339193,0,0.428909
5,5,,,,0.44888,0.292971,0.258149,-0.801,-1.22768,-1.35422,0,0.44888
48,48,,,,0.450293,0.292024,0.257683,-0.797857,-1.23092,-1.356024,0,0.450293
117,117,,,,0.103386,0.486306,0.410308,-2.26929,-0.720917,-0.890846,1,0.486306
83,83,,,,0.176137,0.418436,0.405427,-1.736494,-0.87123,-0.902815,1,0.418436
105,105,,,,0.10155,0.479929,0.418521,-2.287208,-0.734117,-0.871028,1,0.479929


In [11]:
skl_outputs = pd.DataFrame()
X = df[metadata['features']].values
skl_output_key = pd.DataFrame(list(range(X.shape[0])), columns=['idx']);

skl_output_score = pd.DataFrame(columns=['Score_0', 'Score_1', 'Score_2']);
skl_output_proba = pd.DataFrame(clf.predict_proba(X), columns=['Proba_0', 'Proba_1', 'Proba_2'])
skl_output_log_proba = pd.DataFrame(columns=['LogProba_0', 'LogProba_1', 'LogProba_2'])
skl_output_decision = pd.DataFrame(clf.predict(X), columns=['Decision'])
skl_output = pd.concat([skl_output_key, skl_output_score, skl_output_proba, skl_output_log_proba, skl_output_decision] , axis=1)
skl_output.sample(12, random_state=1960)

  input = module(input)


Unnamed: 0,idx,Score_0,Score_1,Score_2,Proba_0,Proba_1,Proba_2,LogProba_0,LogProba_1,LogProba_2,Decision
114,114,,,,0.178223,0.396729,0.425048,,,,2
74,74,,,,0.224922,0.387287,0.387791,,,,2
9,9,,,,0.43468,0.306157,0.259163,,,,0
88,88,,,,0.21761,0.385556,0.396834,,,,2
25,25,,,,0.428909,0.309034,0.262057,,,,0
5,5,,,,0.44888,0.292971,0.258149,,,,0
48,48,,,,0.450293,0.292024,0.257683,,,,0
117,117,,,,0.103386,0.486306,0.410308,,,,1
83,83,,,,0.176137,0.418436,0.405427,,,,1
105,105,,,,0.10155,0.479929,0.418521,,,,1


In [12]:
cpp_skl_join = skl_output.join(cpp_output , how='left', on='idx', lsuffix='_skl', rsuffix='_cpp')

In [13]:
cpp_skl_join.sample(12, random_state=1960)

Unnamed: 0,idx_skl,Score_0_skl,Score_1_skl,Score_2_skl,Proba_0_skl,Proba_1_skl,Proba_2_skl,LogProba_0_skl,LogProba_1_skl,LogProba_2_skl,...,Score_1_cpp,Score_2_cpp,Proba_0_cpp,Proba_1_cpp,Proba_2_cpp,LogProba_0_cpp,LogProba_1_cpp,LogProba_2_cpp,Decision_cpp,DecisionProba
114,114,,,,0.178223,0.396729,0.425048,,,,...,,,0.178223,0.396729,0.425048,-1.724719,-0.924501,-0.855554,2,0.425048
74,74,,,,0.224922,0.387287,0.387791,,,,...,,,0.224922,0.387287,0.387791,-1.492002,-0.948589,-0.947289,2,0.387791
9,9,,,,0.43468,0.306157,0.259163,,,,...,,,0.43468,0.306157,0.259163,-0.833145,-1.183657,-1.350299,0,0.43468
88,88,,,,0.21761,0.385556,0.396834,,,,...,,,0.21761,0.385556,0.396834,-1.525051,-0.953069,-0.924237,2,0.396834
25,25,,,,0.428909,0.309034,0.262057,,,,...,,,0.428909,0.309034,0.262057,-0.846511,-1.174303,-1.339193,0,0.428909
5,5,,,,0.44888,0.292971,0.258149,,,,...,,,0.44888,0.292971,0.258149,-0.801,-1.22768,-1.35422,0,0.44888
48,48,,,,0.450293,0.292024,0.257683,,,,...,,,0.450293,0.292024,0.257683,-0.797857,-1.23092,-1.356024,0,0.450293
117,117,,,,0.103386,0.486306,0.410308,,,,...,,,0.103386,0.486306,0.410308,-2.26929,-0.720917,-0.890846,1,0.486306
83,83,,,,0.176137,0.418436,0.405427,,,,...,,,0.176137,0.418436,0.405427,-1.736494,-0.87123,-0.902815,1,0.418436
105,105,,,,0.10155,0.479929,0.418521,,,,...,,,0.10155,0.479929,0.418521,-2.287208,-0.734117,-0.871028,1,0.479929


In [14]:
condition = (cpp_skl_join.Decision_cpp != cpp_skl_join.Decision_skl)
cpp_skl_join[condition]


Unnamed: 0,idx_skl,Score_0_skl,Score_1_skl,Score_2_skl,Proba_0_skl,Proba_1_skl,Proba_2_skl,LogProba_0_skl,LogProba_1_skl,LogProba_2_skl,...,Score_1_cpp,Score_2_cpp,Proba_0_cpp,Proba_1_cpp,Proba_2_cpp,LogProba_0_cpp,LogProba_1_cpp,LogProba_2_cpp,Decision_cpp,DecisionProba
