mds2$REPORT_NUMBER<- NULL mds2$REPORT_DATE <- NULL mds2$VEHICLE_ID_NUMBER <- NULL df<- mds2 %>% rename(tag = FATALITIES) data2<-ohse(df) b=data2[sample(nrow(data2),replace=F,size=0.20*nrow(data2)),] # Using random forest for variable selection rfModel <-randomForest(tag~ ., data = b,do.trace=TRUE,importance = TRUE) # Getting the list of important variables # make dataframe from importance() output feat_imp_df <- important_variables(measure_importance(rfModel), k = 10) varImpPlot(rfModel) ####################################################### h2o.init() data3<-cbind(data2[,feat_imp_df],data2$tag) results <- h2o_automl(data3, y="data2$tag", project = "Arthur", max_time = 30, seed = 123,plots = TRUE) plot(results$plots$dashboard) results$plots$importance