# Convert LightGBM Model to Java Code and SQL Code
In some real-life cases, we want to deploy the ML model quickly but this production system environment is very complicated. So we could use this tool that converts this LightGBM model to native java code or native SQL code. Those two languages are very popular in the legacy system. Deploy those native model codes without any ML framework. This tool could help everybody to turn all your Machine Learning model to productionization quickly. Wish this tool could make you happy and cozy ^^ !.

In [1]:
import lightgbm2Java, lightgbm2SQL
import pandas as pd
import numpy as np
import lightgbm as lgb
from sklearn.model_selection import train_test_split

# Train Classifier with titanic data

In [2]:
import pandas as pd
import numpy as np
import lightgbm as lgb
from sklearn.model_selection import train_test_split
train_df = pd.read_csv('https://raw.githubusercontent.com/agconti/kaggle-titanic/master/data/train.csv')
test_df = pd.read_csv('https://raw.githubusercontent.com/agconti/kaggle-titanic/master/data/test.csv')
train_df.head()
y = train_df.pop('Survived')
cols = ['Pclass', 'Age', 'Fare', 'SibSp', 'Parch','Sex','Embarked']
X_train, X_test, y_train, y_test = train_test_split(train_df[cols],
                                                    y,
                                                    test_size=0.2,
                                                    random_state=42)

for c in ['Sex', 'Embarked']:
    X_train[c] = X_train[c].astype('category')
    X_test[c] = X_test[c].astype('category')


# Create an LGBM dataset for training
train_data = lgb.Dataset(data=X_train[cols],
                        label=y_train)

# Create an LGBM dataset from the test
test_data = lgb.Dataset(data=X_test[cols],
                        label=y_test)
lgb_params = {
    'boosting': 'dart',          # dart (drop out trees) often performs better
    'application': 'binary',     # Binary classification
    'learning_rate': 0.05,       # Learning rate, controls size of a gradient descent step
    'min_data_in_leaf': 20,      # Data set is quite small so reduce this a bit
    'feature_fraction': 0.7,     # Proportion of features in each boost, controls overfitting
    'metric': 'binary_logloss',  # Area under ROC curve as the evaulation metric
    'drop_rate': 0.15,
    'n_estimators' : 5,
    'num_leaves': 2,
    'max_depth': 2,
}

evaluation_results = {}
model = lgb.train(train_set=train_data,
                params=lgb_params,
                valid_sets=[train_data, test_data],
                valid_names=['Train', 'Test'],
                evals_result=evaluation_results,
                num_boost_round=500,
                early_stopping_rounds=100,
                verbose_eval=20,

                  )

[LightGBM] [Info] Number of positive: 268, number of negative: 444
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 197
[LightGBM] [Info] Number of data points in the train set: 712, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.376404 -> initscore=-0.504838
[LightGBM] [Info] Start training from score -0.504838




# Convert2Java 

In [3]:
lg2java = lightgbm2Java.Lightgbm2Java()
code = lg2java.doProcess(model.dump_model())
print(code)


import java.lang.Math;        

double predictTree0(double Pclass, double Age, double Fare, double SibSp, double Parch, String Sex, String Embarked) { if ( Sex.equals("male") ||  2==1 ) { return -0.5453338404251737; } else { return -0.42764676122463174; } }

double predictTree1(double Pclass, double Age, double Fare, double SibSp, double Parch, String Sex, String Embarked) { if ( Sex.equals("male") ||  2==1 ) { return -0.03888386665860092; } else { return 0.07201202475199128; } }

double predictTree2(double Pclass, double Age, double Fare, double SibSp, double Parch, String Sex, String Embarked) { if ( Sex.equals("male") ||  2==1 ) { return -0.037347301567330744; } else { return 0.06743999325617799; } }

double predictTree3(double Pclass, double Age, double Fare, double SibSp, double Parch, String Sex, String Embarked) { if ( Sex.equals("male") ||  2==1 ) { return -0.0358799961185798; } else { return 0.06336361224151639; } }

double predictTree4(double Pclass, double Age, double Fare,

# Convert2SQL

In [4]:
lg2sql = lightgbm2SQL.Lightgbm2SQL()
code = lg2sql.doProcess(model.dump_model())
print(code)

create view score_view as 

select id,`Pclass`,`Age`,`Fare`,`SibSp`,`Parch`,`Sex`,`Embarked`,
(case when ( `Sex`='male' or  2=1 )  
then   -0.5453338404251737  
else  -0.42764676122463174  end) +

(case when ( `Sex`='male' or  2=1 )  
then   -0.03888386665860092  
else  0.07201202475199128  end) +

(case when ( `Sex`='male' or  2=1 )  
then   -0.037347301567330744  
else  0.06743999325617799  end) +

(case when ( `Sex`='male' or  2=1 )  
then   -0.0358799961185798  
else  0.06336361224151639  end) +

(case when ( Pclass <= 2.5000000000000004 )  
then   0.03503614160722492  
else  -0.02806927705364834  end) 

as score 
from raw_data
;

create view result_view as 
select `Pclass`,`Age`,`Fare`,`SibSp`,`Parch`,`Sex`,`Embarked`,
cast( 1/(1+EXP(-1*score)) as double) from score_view
;
        


# Test SQL

In [9]:
import sqlite3
con = sqlite3.connect(':memory:')
cur = con.cursor()
from math import exp
def sqlite_exp(x):
    return exp(x)
con.create_function("exp", 1, sqlite_exp)
cur.execute("CREATE TABLE raw_data ({});".format(",".join(list(test_df.columns)))) # use your column names here
con.commit()
qmark = ",".join(['?']*test_df.shape[1])
cur.executemany("INSERT INTO raw_data VALUES ({})".format(qmark), test_df.to_numpy())
cur.execute('select * from raw_data limit 2').fetchall()

[(892, 3, 'Kelly, Mr. James', 'male', 34.5, 0, 0, '330911', 7.8292, None, 'Q'),
 (893,
  3,
  'Wilkes, Mrs. James (Ellen Needs)',
  'female',
  47.0,
  1,
  0,
  '363272',
  7.0,
  None,
  'S')]

In [10]:
cur.execute("""

create view score_view as 

select PassengerId as id,`Pclass`,`Age`,`Fare`,`SibSp`,`Parch`,`Sex`,`Embarked`,
(case when ( `Sex`='male' or  2=1 )  
then   -0.5453338404251737  
else  -0.42764676122463174  end) +

(case when ( `Sex`='male' or  2=1 )  
then   -0.03888386665860092  
else  0.07201202475199128  end) +

(case when ( `Sex`='male' or  2=1 )  
then   -0.037347301567330744  
else  0.06743999325617799  end) +

(case when ( `Sex`='male' or  2=1 )  
then   -0.0358799961185798  
else  0.06336361224151639  end) +

(case when ( Pclass <= 2.5000000000000004 )  
then   0.03503614160722492  
else  -0.02806927705364834  end) 

as score 
from raw_data
;



""")
con.commit()


In [11]:
cur.execute("""
select `Pclass`,`Age`,`Fare`,`SibSp`,`Parch`,`Sex`,`Embarked`,
cast( 1/(1+EXP(-1*score)) as double) from score_view
;

""").fetchall()

[(3, 34.5, 7.8292, 0, 0, 'male', 'Q', 0.3350316853651071),
 (3, 47.0, 7.0, 1, 0, 'female', 'S', 0.43710973906239176),
 (2, 62.0, 9.6875, 0, 0, 'male', 'Q', 0.3492337915466473),
 (3, 27.0, 8.6625, 0, 0, 'male', 'S', 0.3350316853651071),
 (3, 22.0, 12.2875, 1, 1, 'female', 'S', 0.43710973906239176),
 (3, 14.0, 9.225, 0, 0, 'male', 'S', 0.3350316853651071),
 (3, 30.0, 7.6292, 0, 0, 'female', 'Q', 0.43710973906239176),
 (2, 26.0, 29.0, 1, 1, 'male', 'S', 0.3492337915466473),
 (3, 18.0, 7.2292, 0, 0, 'female', 'C', 0.43710973906239176),
 (3, 21.0, 24.15, 2, 0, 'male', 'S', 0.3350316853651071),
 (3, None, 7.8958, 0, 0, 'male', 'S', 0.3350316853651071),
 (1, 46.0, 26.0, 0, 0, 'male', 'S', 0.3492337915466473),
 (1, 23.0, 82.2667, 1, 0, 'female', 'S', 0.45269317522158126),
 (2, 63.0, 26.0, 1, 0, 'male', 'S', 0.3492337915466473),
 (1, 47.0, 61.175, 1, 0, 'female', 'S', 0.45269317522158126),
 (2, 24.0, 27.7208, 1, 0, 'female', 'C', 0.45269317522158126),
 (2, 35.0, 12.35, 0, 0, 'male', 'Q', 0.3492