# Time Series Classification Demo

In [129]:
using TSML
using TSMLextra
using DataFrames
using Distributed

## Let's add workers for parallel processing

In [130]:
nprocs()==1 && addprocs()
nworkers()

8

## Load TSML Modules and other Dependencies

In [131]:
@everywhere using TSML
@everywhere using TSMLextra
ENV["COLUMNS"]=1000; # for dataframe column size

## Prediction function

In [132]:
@everywhere function predict(learner,data,train_ind,test_ind)
    features = convert(Matrix,data[:, 1:(end-1)])
    labels = convert(Array,data[:, end])
    # Create pipeline
    pipeline = Pipeline(
       Dict(
         :transformers => [
           OneHotEncoder(), # Encodes nominal features into numeric
           Imputer(), # Imputes NA values
           StandardScaler(),
           learner # Predicts labels on instances
         ]
       )
    )
    # Train
    fit!(pipeline, features[train_ind, :], labels[train_ind]);
    # Predict
    predictions = transform!(pipeline, features[test_ind, :]);
    # Assess predictions
    result = score(:accuracy, labels[test_ind], predictions)
    return result,pipeline
end

## Run in parallel all models in different trials

In [133]:
function parallelmodel(learners::Dict,data::DataFrame;trials=5)
    models=collect(keys(learners))
    ctable=@distributed (vcat) for i=1:trials
        # Split into training and test sets
        Random.seed!(rand(1:100,1)[1])
        (train_ind, test_ind) = holdout(size(data, 1), 0.20)
        acc=@distributed (vcat) for model in models
            res,_=predict(learners[model],data,train_ind,test_ind)
            println("trial ",i,", ",model," => ",round(res))
            [model res i]
        end
        acc
    end
    df = ctable |> DataFrame
    rename!(df,:x1=>:model,:x2=>:acc,:x3=>:trial)
    gp=by(df,:model) do x
       DataFrame(mean=mean(x.acc),std=std(x.acc),n=length(x.acc)) 
    end
    sort!(gp,:mean,rev=true)
    return gp
end



parallelmodel (generic function with 1 method)

## Initialize ML models from Julia, Caret, and Scikitlearn

In [134]:
# Caret ML
@everywhere caret_svmlinear = CaretLearner(Dict(:learner=>"svmLinear"))
@everywhere caret_treebag = CaretLearner(Dict(:learner=>"treebag"))
@everywhere caret_rpart = CaretLearner(Dict(:learner=>"rpart"))
@everywhere caret_rf = CaretLearner(Dict(:learner=>"rf"))

# ScikitLearn ML
@everywhere sk_ridge = SKLearner(Dict(:learner=>"RidgeClassifier"))
@everywhere sk_sgd = SKLearner(Dict(:learner=>"SGDClassifier"))
@everywhere sk_knn = SKLearner(Dict(:learner=>"KNeighborsClassifier"))
@everywhere sk_gb = SKLearner(Dict(:learner=>"GradientBoostingClassifier",:impl_args=>Dict(:n_estimators=>10)))
@everywhere sk_extratree = SKLearner(Dict(:learner=>"ExtraTreesClassifier",:impl_args=>Dict(:n_estimators=>10)))
@everywhere sk_rf = SKLearner(Dict(:learner=>"RandomForestClassifier",:impl_args=>Dict(:n_estimators=>10)))

# Julia ML
@everywhere jrf = RandomForest(Dict(:impl_args=>Dict(:num_trees=>300)))
@everywhere jpt = PrunedTree()
@everywhere jada = Adaboost()

# Julia Ensembles
@everywhere jvote_ens=VoteEnsemble(Dict(:learners=>[jrf,jpt,sk_gb,sk_extratree,caret_rf]))
@everywhere jstack_ada=StackEnsemble(Dict(:stacker=>Adaboost(),:learners=>[jrf,jpt,sk_gb,sk_extratree,caret_rf]))
@everywhere jstack_rf=StackEnsemble(Dict(:stacker=>RandomForest(),:learners=>[jrf,jpt,sk_gb,sk_extratree,caret_rf]))
@everywhere jbest_ens=BestLearner(Dict(:learners=>[jrf,sk_gb,caret_rf]))
@everywhere jsuper_ens=VoteEnsemble(Dict(:learners=>[jvote_ens,jstack_ada,jstack_rf,sk_gb,caret_rf]))

## Use iris dataset for evaluation

In [135]:
using RCall
iris = R"iris"|> rcopy
first(iris,5)

Unnamed: 0_level_0,Sepal_Length,Sepal_Width,Petal_Length,Petal_Width,Species
Unnamed: 0_level_1,Float64,Float64,Float64,Float64,Categorical…
1,5.1,3.5,1.4,0.2,setosa
2,4.9,3.0,1.4,0.2,setosa
3,4.7,3.2,1.3,0.2,setosa
4,4.6,3.1,1.5,0.2,setosa
5,5.0,3.6,1.4,0.2,setosa


## Run in parallel different learners

In [136]:
learners=Dict(
      :jvote_ens=>jvote_ens,:jstack_rf=>jstack_rf,:jbest_ens=>jbest_ens, :jstack_ada=>jstack_ada,
      :jrf => jrf,:jada=>jada,:jsuper_ens=>jsuper_ens,:crt_rpart=>caret_rpart,
      :crt_svmlinear=>caret_svmlinear,:crt_treebag=>caret_treebag,:crt_rf=>caret_rf, 
      :skl_knn=>sk_knn,:skl_gb=>sk_gb,:skl_extratree=>sk_extratree,
      :sk_rf => sk_rf
);

In [149]:
df = parallelmodel(learners,iris;trials=3)







└ @ Base.Docs docs/Docs.jl:223
└ @ Base.Docs docs/Docs.jl:223
└ @ Base.Docs docs/Docs.jl:223
└ @ Base.Docs docs/Docs.jl:223
└ @ Base.Docs docs/Docs.jl:223
└ @ Base.Docs docs/Docs.jl:223
└ @ Base.Docs docs/Docs.jl:223
└ @ Base.Docs docs/Docs.jl:223


      From worker 3:	trial 2, jstack_ada => 93.0
      From worker 7:	trial 2, jada => 93.0
      From worker 9:	trial 2, skl_knn => 90.0
      From worker 9:	trial 3, skl_knn => 97.0
      From worker 9:	trial 1, skl_knn => 97.0
      From worker 2:	trial 1, jstack_ada => 97.0
      From worker 3:	trial 2, jrf => 93.0
      From worker 7:	trial 2, crt_svmlinear => 93.0
      From worker 7:	trial 3, jada => 100.0
      From worker 4:	trial 3, jstack_ada => 97.0
      From worker 4:	trial 2, crt_rf => 93.0
      From worker 7:	trial 3, crt_svmlinear => 97.0
      From worker 7:	trial 1, jada => 100.0
      From worker 4:	trial 2, crt_rpart => 30.0
      From worker 4:	trial 1, crt_rf => 97.0
      From worker 7:	trial 1, crt_svmlinear => 100.0
      From worker 6:	trial 2, jvote_ens => 93.0
      From worker 6:	trial 2, skl_gb => 93.0
      From worker 6:	trial 3, jvote_ens => 97.0
      From worker 6:	trial 3, skl_gb => 100.0
      From worker 6:	trial 1, jvote_ens => 97.0
      From w

Unnamed: 0_level_0,model,mean,std,n
Unnamed: 0_level_1,Any,Float64,Float64,Int64
1,jada,97.7778,3.849,3
2,jrf,96.6667,3.33333,3
3,jbest_ens,96.6667,3.33333,3
4,skl_extratree,96.6667,3.33333,3
5,crt_treebag,96.6667,3.33333,3
6,skl_gb,96.6667,3.33333,3
7,crt_svmlinear,96.6667,3.33333,3
8,jsuper_ens,96.6667,3.33333,3
9,sk_rf,96.6667,3.33333,3
10,jstack_ada,95.5556,1.9245,3


## Pick a learner, evaluate, and view its workflow

In [146]:
(train_ind, test_ind) = holdout(size(iris, 1), 0.20)
res,workflow=predict(learners[:jstack_rf],iris,train_ind,test_ind)
res

93.33333333333333

In [147]:
showtree(workflow)

pipeline
Array{Transformer,1}
├─ OneHotEncoder(Dict{Symbol,Any}(:nominal_column_values_map=>Dict{Int64,Any}(),:nominal_columns=>Int64[]), Dict(:nominal_column_values_map=>nothing,:nominal_columns=>nothing))
├─ Imputer(Dict(:strategy=>mean), Dict(:strategy=>mean))
├─ StandardScaler(Dict(:standardize_transform=>Standardize(4, [5.835, 3.085, 3.66, 1.14167], [1.15223, 2.27993, 0.551686, 1.29855])), Dict(:scale=>1,:center=>1))
└─ StackEnsemble(Dict{Symbol,Any}(:learners=>TSLearner[RandomForest(Ensemble of Decision Trees
   Trees:      300
   Avg Leaves: 3.85
   Avg Depth:  2.8066666666666666, Dict{Symbol,Any}(:output=>:class,:impl_args=>Dict{Symbol,Real}(:max_depth=>-1,:num_subfeatures=>0,:num_trees=>300,:partial_sampling=>0.7))), PrunedTree(Decision Tree
   Leaves: 5
   Depth:  4, Dict{Symbol,Any}(:output=>:class,:impl_args=>Dict{Symbol,Real}(:purity_threshold=>1.0,:min_samples_split=>2,:min_samples_leaf=>1,:min_purity_increase=>0.0,:max_depth=>-1))), SKLearner(PyObject GradientBoostingCla

## Pick another learner, evaluate, and view its workflow

In [140]:
(train_ind, test_ind) = holdout(size(iris, 1), 0.20)
res,workflow=predict(learners[:jstack_ada],iris,train_ind,test_ind)
res

96.66666666666667

In [141]:
showtree(workflow)

pipeline
Array{Transformer,1}
├─ OneHotEncoder(Dict{Symbol,Any}(:nominal_column_values_map=>Dict{Int64,Any}(),:nominal_columns=>Int64[]), Dict(:nominal_column_values_map=>nothing,:nominal_columns=>nothing))
├─ Imputer(Dict(:strategy=>mean), Dict(:strategy=>mean))
├─ StandardScaler(Dict(:standardize_transform=>Standardize(4, [5.81583, 3.03917, 3.75167, 1.19083], [1.23485, 2.24334, 0.582214, 1.34074])), Dict(:scale=>1,:center=>1))
└─ StackEnsemble(Dict{Symbol,Any}(:learners=>TSLearner[RandomForest(Ensemble of Decision Trees
   Trees:      300
   Avg Leaves: 4.443333333333333
   Avg Depth:  3.283333333333333, Dict{Symbol,Any}(:output=>:class,:impl_args=>Dict{Symbol,Real}(:max_depth=>-1,:num_subfeatures=>0,:num_trees=>300,:partial_sampling=>0.7))), PrunedTree(Decision Tree
   Leaves: 5
   Depth:  4, Dict{Symbol,Any}(:output=>:class,:impl_args=>Dict{Symbol,Real}(:purity_threshold=>1.0,:min_samples_split=>2,:min_samples_leaf=>1,:min_purity_increase=>0.0,:max_depth=>-1))), SKLearner(PyObject 

## Pick the super ensemble learner and view its structure

In [142]:
(train_ind, test_ind) = holdout(size(iris, 1), 0.20)
res,workflow=predict(learners[:jsuper_ens],iris,train_ind,test_ind)
res

100.0

In [143]:
showtree(workflow)

pipeline
Array{Transformer,1}
├─ OneHotEncoder(Dict{Symbol,Any}(:nominal_column_values_map=>Dict{Int64,Any}(),:nominal_columns=>Int64[]), Dict(:nominal_column_values_map=>nothing,:nominal_columns=>nothing))
├─ Imputer(Dict(:strategy=>mean), Dict(:strategy=>mean))
├─ StandardScaler(Dict(:standardize_transform=>Standardize(4, [5.87333, 3.06083, 3.83, 1.24667], [1.24922, 2.39732, 0.568168, 1.2983])), Dict(:scale=>1,:center=>1))
└─ VoteEnsemble(Dict(:learners=>[VoteEnsemble(Dict(:learners=>[RandomForest(Ensemble of Decision Trees
   Trees:      300
   Avg Leaves: 3.34
   Avg Depth:  2.316666666666667, Dict{Symbol,Any}(:output=>:class,:impl_args=>Dict{Symbol,Real}(:max_depth=>-1,:num_subfeatures=>0,:num_trees=>300,:partial_sampling=>0.7))), PrunedTree(Decision Tree
   Leaves: 4
   Depth:  3, Dict{Symbol,Any}(:output=>:class,:impl_args=>Dict{Symbol,Real}(:purity_threshold=>1.0,:min_samples_split=>2,:min_samples_leaf=>1,:min_purity_increase=>0.0,:max_depth=>-1))), SKLearner(PyObject GradientB

   Trees:      7
   Avg Leaves: 2.0
   Avg Depth:  1.0,:coefficients=>[0.285272, 0.332118, 0.321616, 0.278327, 0.396149, 0.27051, 0.349147]), Dict{Symbol,Any}(:output=>:class,:impl_args=>Dict(:num_iterations=>7))),:stacker_training_proportion=>0.3)), StackEnsemble(Dict{Symbol,Any}(:learners=>TSLearner[RandomForest(Ensemble of Decision Trees
   Trees:      300
   Avg Leaves: 3.34
   Avg Depth:  2.316666666666667, Dict{Symbol,Any}(:output=>:class,:impl_args=>Dict{Symbol,Real}(:max_depth=>-1,:num_subfeatures=>0,:num_trees=>300,:partial_sampling=>0.7))), PrunedTree(Decision Tree
   Leaves: 4
   Depth:  3, Dict{Symbol,Any}(:output=>:class,:impl_args=>Dict{Symbol,Real}(:purity_threshold=>1.0,:min_samples_split=>2,:min_samples_leaf=>1,:min_purity_increase=>0.0,:max_depth=>-1))), SKLearner(PyObject GradientBoostingClassifier(criterion='friedman_mse', init=None,
                              learning_rate=0.1, loss='deviance', max_depth=3,
                              max_features=None, max_le

                              validation_fraction=0.1, verbose=0,
                              warm_start=False), Dict{Symbol,Any}(:output=>:class,:impl_args=>Dict{Any,Any}(:n_estimators=>10),:learner=>"GradientBoostingClassifier")), SKLearner(PyObject ExtraTreesClassifier(bootstrap=False, class_weight=None, criterion='gini',
                        max_depth=None, max_features='auto', max_leaf_nodes=None,
                        min_impurity_decrease=0.0, min_impurity_split=None,
                        min_samples_leaf=1, min_samples_split=2,
                        min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=None,
                        oob_score=False, random_state=None, verbose=0,
                        warm_start=False), Dict{Symbol,Any}(:output=>:class,:impl_args=>Dict{Any,Any}(:n_estimators=>10),:learner=>"ExtraTreesClassifier")), CaretLearner(RObject{VecSxp}
   Random Forest 
   
   120 samples
     4 predictor
     3 classes: 'setosa', 'versicolor', 'virginica' 


                              random_state=None, subsample=1.0, tol=0.0001,
                              validation_fraction=0.1, verbose=0,
                              warm_start=False), Dict{Symbol,Any}(:output=>:class,:impl_args=>Dict{Any,Any}(:n_estimators=>10),:learner=>"GradientBoostingClassifier")), SKLearner(PyObject ExtraTreesClassifier(bootstrap=False, class_weight=None, criterion='gini',
                        max_depth=None, max_features='auto', max_leaf_nodes=None,
                        min_impurity_decrease=0.0, min_impurity_split=None,
                        min_samples_leaf=1, min_samples_split=2,
                        min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=None,
                        oob_score=False, random_state=None, verbose=0,
                        warm_start=False), Dict{Symbol,Any}(:output=>:class,:impl_args=>Dict{Any,Any}(:n_estimators=>10),:learner=>"ExtraTreesClassifier")), CaretLearner(RObject{VecSxp}
   Random Forest 
   
   120 sa