# Convert LightGBM Model to Java Code and SQL Code
In some real-life cases, we want to deploy the ML model quickly but this production system environment is very complicated. So we could use this tool that converts this LightGBM model to native java code or native SQL code. Those two languages are very popular in the legacy system. Deploy those native model codes without any ML framework. This tool could help everybody to turn all your Machine Learning model to productionization quickly. Wish this tool could make you happy and cozy ^^ !.

In [1]:
import lightgbm2Java, lightgbm2SQL
import pandas as pd
import numpy as np
import lightgbm as lgb
from sklearn.model_selection import train_test_split

# Train Classifier with titanic data

In [2]:
import pandas as pd
import numpy as np
import lightgbm as lgb
from sklearn.model_selection import train_test_split
train_df = pd.read_csv('https://raw.githubusercontent.com/agconti/kaggle-titanic/master/data/train.csv')
test_df = pd.read_csv('https://raw.githubusercontent.com/agconti/kaggle-titanic/master/data/test.csv')
train_df.head()
y = train_df.pop('Survived')
cols = ['Pclass', 'Age', 'Fare', 'SibSp', 'Parch','Sex','Embarked']
X_train, X_test, y_train, y_test = train_test_split(train_df[cols],
                                                    y,
                                                    test_size=0.2,
                                                    random_state=42)

for c in ['Sex', 'Embarked']:
    X_train[c] = X_train[c].astype('category')
    X_test[c] = X_test[c].astype('category')


# Create an LGBM dataset for training
train_data = lgb.Dataset(data=X_train[cols],
                        label=y_train)

# Create an LGBM dataset from the test
test_data = lgb.Dataset(data=X_test[cols],
                        label=y_test)
lgb_params = {
    'boosting': 'dart',          # dart (drop out trees) often performs better
    'application': 'binary',     # Binary classification
    'learning_rate': 0.05,       # Learning rate, controls size of a gradient descent step
    'min_data_in_leaf': 20,      # Data set is quite small so reduce this a bit
    'feature_fraction': 0.7,     # Proportion of features in each boost, controls overfitting
    'metric': 'binary_logloss',  # Area under ROC curve as the evaulation metric
    'drop_rate': 0.15,
    'n_estimators' : 5,
    'num_leaves': 4,
    'max_depth': 3,
}

evaluation_results = {}
model = lgb.train(train_set=train_data,
                params=lgb_params,
                valid_sets=[train_data, test_data],
                valid_names=['Train', 'Test'],
                evals_result=evaluation_results,
                num_boost_round=500,
                early_stopping_rounds=100,
                verbose_eval=20,

                  )

[LightGBM] [Info] Number of positive: 268, number of negative: 444
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 197
[LightGBM] [Info] Number of data points in the train set: 712, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.376404 -> initscore=-0.504838
[LightGBM] [Info] Start training from score -0.504838




# Convert2Java 

In [3]:
lg2java = lightgbm2Java.Lightgbm2Java()
code = lg2java.doProcess(model.dump_model())
print(code)


import java.lang.Math;        

double predictTree0(double Pclass, double Age, double Fare, double SibSp, double Parch, String Sex, String Embarked) { if ( Sex.equals("male") ||  2==1 ) { if ( Age <= 6.500000000000001 ) { return -0.43977958328510247; } else { return -0.5505522531377166; } } else { if ( Fare <= 47.00000000000001 ) { return -0.44341053776981176; } else { return -0.38743770800794064; } } }

double predictTree1(double Pclass, double Age, double Fare, double SibSp, double Parch, String Sex, String Embarked) { if ( Sex.equals("male") ||  2==1 ) { if ( Age <= 6.500000000000001 ) { return 0.060855060145106304; } else { return -0.043957340673887485; } } else { if ( SibSp <= 1.5000000000000002 ) { return 0.07994095823701186; } else { return 0.015206689695068991; } } }

double predictTree2(double Pclass, double Age, double Fare, double SibSp, double Parch, String Sex, String Embarked) { if ( Sex.equals("male") ||  2==1 ) { if ( Age <= 6.500000000000001 ) { return 0.0570875242782

# Convert2SQL

In [4]:
lg2sql = lightgbm2SQL.Lightgbm2SQL()
code = lg2sql.doProcess(model.dump_model())
print(code)

create view score_view as 
(
select id,`Pclass`,`Age`,`Fare`,`SibSp`,`Parch`,`Sex`,`Embarked`,
(case when ( `Sex`='male' or  2=1 )  
then  case when ( Age <= 6.500000000000001 )  
then   -0.43977958328510247  
else  -0.5505522531377166  end  
else case when ( Fare <= 47.00000000000001 )  
then   -0.44341053776981176  
else  -0.38743770800794064  end  end) +

(case when ( `Sex`='male' or  2=1 )  
then  case when ( Age <= 6.500000000000001 )  
then   0.060855060145106304  
else  -0.043957340673887485  end  
else case when ( SibSp <= 1.5000000000000002 )  
then   0.07994095823701186  
else  0.015206689695068991  end  end) +

(case when ( `Sex`='male' or  2=1 )  
then  case when ( Age <= 6.500000000000001 )  
then   0.05708752427825118  
else case when ( Fare <= 26.268750000000004 )  
then   -0.05305258348576311  
else  -0.011755823383138858  end  end  
else  0.06745206741796382  end) +

(case when ( `Sex`='male' or  2=1 )  
then  case when ( Age <= 6.500000000000001 )  
then   0.053684509