In [96]:
nprocs()==1 && addprocs()

@everywhere using Combine.Util: score, holdout,kfold
@everywhere using Combine.Transformers: fit!,Pipeline,OneHotEncoder,Imputer
@everywhere using Combine.Transformers.DecisionTreeWrapper: RandomForest,DecisionStumpAdaboost,PrunedTree
@everywhere using Combine.Transformers.ScikitLearnWrapper.SKLLearner
@everywhere using Combine.Transformers.EnsembleMethods
@everywhere import RDatasets

In [84]:
@everywhere using ScikitLearn
@everywhere @sk_import neighbors: KNeighborsClassifier
@everywhere @sk_import svm: SVC

@everywhere adaLearner = DecisionStumpAdaboost(Dict(
  # Output to train against
  # (:class).
  :output => :class,
  # Options specific to this implementation.
  :impl_options => Dict(
    # Number of boosting iterations.
    :num_iterations => 7
  )
))

@everywhere rfLearner = RandomForest(Dict(
  :output => :class,
  :impl_options => Dict(
    :num_subfeatures => nothing,
    :num_trees => 10,
    :partial_sampling => 0.7
  )
))

@everywhere skLearner = SKLLearner(Dict(
  :output => :class,
  #:learner => "KNeighborsClassifier",
  :learner => "SVC",
  :impl_options => Dict()
))

@everywhere bestLearner = BestLearner(Dict(
  :output => :class,
  :partition_generator => (X, y) -> kfold(size(X, 1), 5),
  :selection_function => (learner_partition_scores) -> findmax(mean(learner_partition_scores, 2))[2],      
  :score_type => Real,
  :learners => [PrunedTree(), RandomForest(),skLearner],
  :learner_options_grid => nothing
))

@everywhere voteLearner = VoteEnsemble(Dict(
  :output => :class,
  # Learners in voting committee.
  :learners => [RandomForest(),PrunedTree(), adaLearner,skLearner]
));



In [85]:
@everywhere stackerLearner(treeprop) = StackEnsemble(Dict(
  :output => :class,
  :learners => [PrunedTree(), DecisionStumpAdaboost(), RandomForest(),skLearner,voteLearner,bestLearner],
  :stacker => RandomForest(),
  # Proportion of training set left to train stacker itself.
  :stacker_training_proportion => treeprop,
  :keep_original_features => false
));



In [86]:
@everywhere function processModel(learner)
    iris = RDatasets.dataset("datasets", "iris")
    X = convert(Array, iris[[:SepalLength, :SepalWidth, :PetalLength, :PetalWidth]])
    y = convert(Array, iris[:Species]);
    (train_ind, test_ind) = holdout(size(X, 1), 0.3)
    pipeline = Pipeline(Dict(
            :transformers => [
                OneHotEncoder(), # Encodes nominal features into numeric
                Imputer(), # Imputes NA values
                #StandardScaler(), # Standardizes features 
                learner # Predicts labels on instances
            ]
        ))
    # Train
    fit!(pipeline, X[train_ind, :], y[train_ind]);
    # Predict
    predictions = transform!(pipeline, X[test_ind, :]);
    result = score(:accuracy, y[test_ind], predictions)
    return(result)
end



In [87]:
learner=stackerLearner(0.9);processModel(learner)

86.66666666666667

In [100]:
trpropTable=@parallel (vcat) for prop in [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]
    trials = @parallel (vcat) for trial=1:30
        learner=stackerLearner(prop)
        res = processModel(learner)
    end
    @show [prop mean(trials) std(trials)]
    [prop mean(trials) std(trials)]
end

	From worker 4:	[prop mean(trials) std(trials)] = [0.6 95.4815 3.42695]
	From worker 3:	[prop mean(trials) std(trials)] = [0.4 94.5185 3.72459]
	From worker 5:	[prop mean(trials) std(trials)] = [0.8 95.7037 2.60551]
	From worker 2:	[prop mean(trials) std(trials)] = [0.1 87.5556 18.1156]
	From worker 5:	[prop mean(trials) std(trials)] = [0.9 92.8889 9.39915]
	From worker 3:	[prop mean(trials) std(trials)] = [0.5 95.2593 3.85931]
	From worker 4:	[prop mean(trials) std(trials)] = [0.7 94.2222 6.40881]
	From worker 2:	[prop mean(trials) std(trials)] = [0.2 93.8519 5.91362]
	From worker 2:	[prop mean(trials) std(trials)] = [0.3 95.5556 2.97571]


9×3 Array{Float64,2}:
 0.1  87.5556  18.1156 
 0.2  93.8519   5.91362
 0.3  95.5556   2.97571
 0.4  94.5185   3.72459
 0.5  95.2593   3.85931
 0.6  95.4815   3.42695
 0.7  94.2222   6.40881
 0.8  95.7037   2.60551
 0.9  92.8889   9.39915

In [98]:
using DataFrames
sorted = sortrows(trpropTable,by=x->x[2],rev=true);
sorted = DataFrame(sorted);
rename!(sorted,Dict(:x1=>:Proportion,:x2=>:ACC,:x3=>:SD))

Unnamed: 0,Proportion,ACC,SD
1,0.3,95.55555555555556,2.7715980642769926
2,0.8,95.55555555555554,2.095131203515694
3,0.6,95.33333333333334,3.2203059435976504
4,0.9,94.88888888888889,5.029405984227544
5,0.5,94.66666666666666,2.608417312838372
6,0.4,94.44444444444444,4.829038818668625
7,0.7,94.2222222222222,4.823354250696211
8,0.2,85.55555555555556,21.57705431648816
9,0.1,81.77777777777779,12.86417383472143
