In [1]:
library(caret, quiet = TRUE)
library(base64enc)
library(httr, quiet = TRUE)

library(mlbench)


Attaching package: ‘httr’

The following object is masked from ‘package:caret’:

    progress



# Build a Model

In [2]:
## https://www.machinelearningplus.com/machine-learning/caret-package/
## https://en.wikipedia.org/wiki/Multivariate_adaptive_regression_splines

data(BostonHousing)
BostonHousing$chas = as.numeric(BostonHousing$chas)

set.seed(1960)

dataset = BostonHousing[, -14] 

create_model  =  function() {
    grid = data.frame(degree=2, nprune = 20)
    ctrl = trainControl(method="none")
    model <- train(medv ~ ., data = BostonHousing, method = "earth", preProcess=c("pca"), trControl = ctrl, tuneGrid = grid)    

    return(model)
}


In [3]:
model = create_model()
# cat(model$feature_names)
# print(model)

Loading required package: earth
Loading required package: plotmo
Loading required package: plotrix
Loading required package: TeachingDemos


In [4]:
pred_labels <- predict(model, BostonHousing[, -14] , type="raw")
df = data.frame(BostonHousing[,14])
names(df) = c("medv")
df$Estimator = pred_labels
df$Error = df$Estimator - df$medv
MAPE = mean(abs(df$Error / df$medv))
summary(df)
MAPE

      medv          Estimator.y           Error.y       
 Min.   : 5.00   Min.   : 3.41514   Min.   :-28.648786  
 1st Qu.:17.02   1st Qu.:16.87419   1st Qu.: -1.464352  
 Median :21.20   Median :21.24412   Median :  0.438401  
 Mean   :22.53   Mean   :22.53281   Mean   :  0.000000  
 3rd Qu.:25.00   3rd Qu.:26.05895   3rd Qu.:  2.100269  
 Max.   :50.00   Max.   :53.93316   Max.   : 10.053553  

# SQL Code Generation

In [5]:

test_ws_sql_gen = function(mod) {
    WS_URL = "https://sklearn2sql.herokuapp.com/model"
    WS_URL = "http://localhost:1888/model"
    model_serialized <- serialize(mod, NULL)
    b64_data = base64encode(model_serialized)
    data = list(Name = "xgboost_test_model", SerializedModel = b64_data , SQLDialect = "postgresql" , Mode="caret")
    r = POST(WS_URL, body = data, encode = "json")
    # print(r)
    content = content(r)
    # print(content)
    lSQL = content$model$SQLGenrationResult[[1]]$SQL # content["model"]["SQLGenrationResult"][0]["SQL"]
    return(lSQL);
}

In [6]:
lModelSQL = test_ws_sql_gen(model)
cat(lModelSQL)


WITH "CenteredDataForPCA" AS 
(SELECT "ADS"."KEY" AS "KEY", (CAST("ADS"."Feature_0" AS FLOAT) - 3.6135235573122526) / 8.60154510533249 AS "Feature_0", (CAST("ADS"."Feature_1" AS FLOAT) - 11.363636363636365) / 23.322452994515135 AS "Feature_1", (CAST("ADS"."Feature_2" AS FLOAT) - 11.13677865612648) / 6.860352940897585 AS "Feature_2", (CAST("ADS"."Feature_3" AS FLOAT) - 1.0691699604743083) / 0.2539940413404104 AS "Feature_3", (CAST("ADS"."Feature_4" AS FLOAT) - 0.5546950592885376) / 0.115877675667556 AS "Feature_4", (CAST("ADS"."Feature_5" AS FLOAT) - 6.284634387351779) / 0.7026171434153233 AS "Feature_5", (CAST("ADS"."Feature_6" AS FLOAT) - 68.57490118577077) / 28.148861406903617 AS "Feature_6", (CAST("ADS"."Feature_7" AS FLOAT) - 3.7950426877470353) / 2.105710126627611 AS "Feature_7", (CAST("ADS"."Feature_8" AS FLOAT) - 9.549407114624506) / 8.707259384239368 AS "Feature_8", (CAST("ADS"."Feature_9" AS FLOAT) - 408.2371541501977) / 168.537116054959 AS "Feature_9", (CAST("ADS"."Feature_10

# Execute the SQL Code

In [7]:
library(RODBC)
conn = odbcConnect("pgsql", uid="db", pwd="db", case="nochange")
odbcSetAutoCommit(conn , autoCommit = TRUE)

In [8]:
df_sql = dataset
names(df_sql) = sprintf("Feature_%d",0:(ncol(df_sql)-1))
df_sql$KEY = seq.int(nrow(dataset))

sqlDrop(conn , "INPUT_DATA" , errors = FALSE)
sqlSave(conn, df_sql, tablename = "INPUT_DATA", verbose = FALSE)

# df_sql

In [9]:
colnames(df_sql)
# odbcGetInfo(conn)
# sqlTables(conn)

In [10]:
df_sql_out = sqlQuery(conn, lModelSQL)
head(df_sql_out[order(df_sql_out$KEY),])

KEY,Estimator
1,29.80887
2,23.47075
3,32.29715
4,30.30063
5,28.91063
6,23.51118


In [11]:
# df_sql_out

# R RPART Output

In [12]:
estimator  =  predict(model, dataset, type = "raw")
df_r_out = data.frame(estimator)
names(df_r_out) = c("Estimator")

df_r_out$KEY = seq.int(nrow(dataset))
head(df_r_out)


Estimator,KEY
29.80887,1
23.47075,2
32.29715,3
30.30063,4
28.91063,5
23.51118,6


# Compare R and SQL output

In [13]:
df_merge = merge(x = df_r_out, y = df_sql_out, by = "KEY", all = TRUE, , suffixes = c("_1","_2"))
head(df_merge)

KEY,Estimator_1,Estimator_2
1,29.80887,29.80887
2,23.47075,23.47075
3,32.29715,32.29715
4,30.30063,30.30063
5,28.91063,28.91063
6,23.51118,23.51118


In [14]:
df_merge$Error = df_merge$Estimator_1 - df_merge$Estimator_2
df_merge$AbsError = abs(df_merge$Error)
head(df_merge)


KEY,Estimator_1,Estimator_2,Error,AbsError
1,29.80887,29.80887,-3.552714e-15,3.552714e-15
2,23.47075,23.47075,-3.552714e-15,3.552714e-15
3,32.29715,32.29715,0.0,0.0
4,30.30063,30.30063,-3.552714e-15,3.552714e-15
5,28.91063,28.91063,-3.552714e-15,3.552714e-15
6,23.51118,23.51118,0.0,0.0


In [15]:
df_merge_largest_errors = df_merge[df_merge$AbsError > 0.0001,]
df_merge_largest_errors

KEY,Estimator_1,Estimator_2,Error,AbsError


In [16]:
nrow(df_merge_largest_errors)
stopifnot(nrow(df_merge_largest_errors) == 0)


In [17]:
summary(df_sql_out)

      KEY          Estimator     
 Min.   :  1.0   Min.   : 3.415  
 1st Qu.:127.2   1st Qu.:16.874  
 Median :253.5   Median :21.244  
 Mean   :253.5   Mean   :22.533  
 3rd Qu.:379.8   3rd Qu.:26.059  
 Max.   :506.0   Max.   :53.933  

In [18]:
summary(df_r_out)

   Estimator           KEY       
 Min.   : 3.415   Min.   :  1.0  
 1st Qu.:16.874   1st Qu.:127.2  
 Median :21.244   Median :253.5  
 Mean   :22.533   Mean   :253.5  
 3rd Qu.:26.059   3rd Qu.:379.8  
 Max.   :53.933   Max.   :506.0  

In [19]:
summary(df_merge)

      KEY         Estimator_1      Estimator_2         Error           
 Min.   :  1.0   Min.   : 3.415   Min.   : 3.415   Min.   :-2.132e-14  
 1st Qu.:127.2   1st Qu.:16.874   1st Qu.:16.874   1st Qu.:-3.553e-15  
 Median :253.5   Median :21.244   Median :21.244   Median : 0.000e+00  
 Mean   :253.5   Mean   :22.533   Mean   :22.533   Mean   :-1.918e-15  
 3rd Qu.:379.8   3rd Qu.:26.059   3rd Qu.:26.059   3rd Qu.: 0.000e+00  
 Max.   :506.0   Max.   :53.933   Max.   :53.933   Max.   : 1.066e-14  
    AbsError        
 Min.   :0.000e+00  
 1st Qu.:0.000e+00  
 Median :3.553e-15  
 Mean   :3.427e-15  
 3rd Qu.:3.553e-15  
 Max.   :2.132e-14  

In [20]:
model$finalModel

Selected 19 of 21 terms, and 7 of 9 predictors
Termination condition: Reached nk 21
Importance: PC1, PC4, PC3, PC5, PC8, PC6, PC7, PC2-unused, PC9-unused
Number of terms at each degree of interaction: 1 7 11
GCV 17.53049    RSS 7330.931    GRSq 0.7931609    RSq 0.8283809

In [21]:
model$modelInfo

parameter,class,label
nprune,numeric,#Terms
degree,numeric,Product Degree


In [22]:
earth1 = model$finalModel

In [23]:
earth1$coefficients

Unnamed: 0,y
(Intercept),17.3676548
h(PC1-2.7195),-3.1493074
h(2.7195-PC1),2.1086014
h(2.7195-PC1)*h(PC3-0.434215),1.2164292
h(2.7195-PC1)*h(0.434215-PC3),-0.9894268
h(2.7195-PC1)*h(PC5--0.856526),-0.4210254
h(2.7195-PC1)*h(PC4-0.107979),1.0042056
h(2.7195-PC1)*h(0.107979-PC4),-0.5858436
h(PC4-0.0386537),2.7610601
h(0.0386537-PC4),4.1187173


In [24]:
earth1$bx

(Intercept),h(PC1-2.7195),h(2.7195-PC1),h(2.7195-PC1)*h(PC3-0.434215),h(2.7195-PC1)*h(0.434215-PC3),h(2.7195-PC1)*h(PC5--0.856526),h(2.7195-PC1)*h(PC4-0.107979),h(2.7195-PC1)*h(0.107979-PC4),h(PC4-0.0386537),h(0.0386537-PC4),h(0.499496-PC8),h(PC5--0.856526)*h(0.499496-PC8),h(-0.856526-PC5)*h(0.499496-PC8),h(PC6-0.0875545),h(0.0875545-PC6),h(PC3-0.0242175)*h(0.499496-PC8),h(0.0242175-PC3)*h(0.499496-PC8),h(0.0386537-PC4)*h(PC7--0.405524),h(0.0386537-PC4)*h(-0.405524-PC7)
1,0,4.815721,0,0.4411738,6.1601656,3.77029455,0.0000000,0.852238740,0.00000000,0.79503510,1.01699160,0.00000000,0.0000000,0.4025809,0.25312818,0.00000000,0.00000000,0
1,0,4.175309,0,4.7127834,2.7592284,1.58243392,0.0000000,0.448322958,0.00000000,0.27604695,0.18242401,0.00000000,0.1764075,0.0000000,0.00000000,0.19840309,0.00000000,0
1,0,4.792045,0,1.2807145,0.0000000,3.02135924,0.0000000,0.699819733,0.00000000,0.60455793,0.00000000,0.04660175,0.3600971,0.0000000,0.08629391,0.00000000,0.00000000,0
1,0,5.328420,0,2.8475066,0.0000000,1.25432519,0.0000000,0.304727767,0.00000000,0.75518351,0.00000000,0.18678514,0.5764379,0.0000000,0.00000000,0.09394672,0.00000000,0
1,0,5.175253,0,2.6367321,0.0000000,1.65352046,0.0000000,0.388830175,0.00000000,0.36510471,0.00000000,0.07606749,0.5288827,0.0000000,0.00000000,0.03632472,0.00000000,0
1,0,4.932160,0,5.4546294,1.1341925,0.33419164,0.0000000,0.137082585,0.00000000,0.60907750,0.14006260,0.00000000,0.4832998,0.0000000,0.00000000,0.42387766,0.00000000,0
1,0,4.077036,0,3.2854644,7.8630234,1.18136852,0.0000000,0.359086555,0.00000000,0.78594139,1.51577668,0.00000000,0.0000000,0.5459703,0.00000000,0.31111414,0.00000000,0
1,0,3.560710,0,3.3906600,7.9576541,1.52842774,0.0000000,0.498572929,0.00000000,0.00000000,0.00000000,0.00000000,0.0000000,0.6662397,0.00000000,0.00000000,0.00000000,0
1,0,2.899248,0,5.1679661,9.2906597,0.39922162,0.0000000,0.207023235,0.00000000,0.00000000,0.00000000,0.00000000,0.0000000,0.7119983,0.00000000,0.00000000,0.00000000,0
1,0,3.792620,0,3.7627794,8.6685754,1.02999246,0.0000000,0.340902989,0.00000000,0.04246197,0.09705290,0.00000000,0.0000000,0.5429801,0.00000000,0.02471858,0.00000000,0


In [25]:
earth1$cuts

Unnamed: 0,PC1,PC2,PC3,PC4,PC5,PC6,PC7,PC8,PC9
(Intercept),0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0
h(PC1-2.7195),2.719498,0,0.0,0.0,0.0,0.0,0.0,0.0,0
h(2.7195-PC1),2.719498,0,0.0,0.0,0.0,0.0,0.0,0.0,0
h(2.7195-PC1)*h(PC3-0.434215),2.719498,0,0.43421484,0.0,0.0,0.0,0.0,0.0,0
h(2.7195-PC1)*h(0.434215-PC3),2.719498,0,0.43421484,0.0,0.0,0.0,0.0,0.0,0
h(2.7195-PC1)*h(PC5--0.856526),2.719498,0,0.0,0.0,-0.8565261,0.0,0.0,0.0,0
h(2.7195-PC1)*h(-0.856526-PC5),2.719498,0,0.0,0.0,-0.8565261,0.0,0.0,0.0,0
h(2.7195-PC1)*h(PC4-0.107979),2.719498,0,0.0,0.10797858,0.0,0.0,0.0,0.0,0
h(2.7195-PC1)*h(0.107979-PC4),2.719498,0,0.0,0.10797858,0.0,0.0,0.0,0.0,0
h(PC4-0.0386537),0.0,0,0.0,0.03865366,0.0,0.0,0.0,0.0,0


In [26]:
earth1$dirs

Unnamed: 0,PC1,PC2,PC3,PC4,PC5,PC6,PC7,PC8,PC9
(Intercept),0,0,0,0,0,0,0,0,0
h(PC1-2.7195),1,0,0,0,0,0,0,0,0
h(2.7195-PC1),-1,0,0,0,0,0,0,0,0
h(2.7195-PC1)*h(PC3-0.434215),-1,0,1,0,0,0,0,0,0
h(2.7195-PC1)*h(0.434215-PC3),-1,0,-1,0,0,0,0,0,0
h(2.7195-PC1)*h(PC5--0.856526),-1,0,0,0,1,0,0,0,0
h(2.7195-PC1)*h(-0.856526-PC5),-1,0,0,0,-1,0,0,0,0
h(2.7195-PC1)*h(PC4-0.107979),-1,0,0,1,0,0,0,0,0
h(2.7195-PC1)*h(0.107979-PC4),-1,0,0,-1,0,0,0,0,0
h(PC4-0.0386537),0,0,0,1,0,0,0,0,0
