# Time Series Classification Demo

In [3]:
using TSML

┌ Info: Recompiling stale cache file /Users/ppalmes/.julia/compiled/v1.3/TSML/oqobt.ji for TSML [198dc43e-9e51-5cd7-9d40-d9794d335912]
└ @ Base loading.jl:1240


## Let's add workers for parallel processing

In [2]:
using Distributed
nprocs()==1 && addprocs()
nworkers()

8

## Load TSML Modules and other Dependencies

In [5]:
# @everywhere using TSML, TSMLextra, Plots
# @everywhere using TSML.TSMLTypes
# @everywhere using TSML: TSClassifier
# @everywhere using TSML.TSClassifiers.FileStats
# @everywhere using TSML.TSMLTransformers
# @everywhere using TSML.EnsembleMethods
# @everywhere using TSML.DecisionTreeLearners
# @everywhere using TSML.Utils
# @everywhere using TSMLextra.CaretLearners
# @everywhere using TSMLextra.SKLearners
# @everywhere using TSML.MLBaseWrapper

@everywhere using TSML
@everywhere using TSMLextra
@everywhere using DataFrames
@everywhere using Random
@everywhere using Statistics
@everywhere using StatsBase: iqr
@everywhere using RDatasets
ENV["COLUMNS"]=1000; # for dataframe column size

## Initialize ML models from Julia, Caret, and Scikitlearn

In [6]:
# Caret ML
@everywhere caret_svmlinear = CaretLearner(Dict(:learner=>"svmLinear"))
@everywhere caret_treebag = CaretLearner(Dict(:learner=>"treebag"))
@everywhere caret_rpart = CaretLearner(Dict(:learner=>"rpart"))
@everywhere caret_rf = CaretLearner(Dict(:learner=>"rf"))

# ScikitLearn ML
@everywhere sk_ridge = SKLearner(Dict(:learner=>"RidgeClassifier"))
@everywhere sk_sgd = SKLearner(Dict(:learner=>"SGDClassifier"))
@everywhere sk_knn = SKLearner(Dict(:learner=>"KNeighborsClassifier"))
@everywhere sk_gb = SKLearner(Dict(:learner=>"GradientBoostingClassifier",:impl_args=>Dict(:n_estimators=>10)))
@everywhere sk_extratree = SKLearner(Dict(:learner=>"ExtraTreesClassifier",:impl_args=>Dict(:n_estimators=>10)))
@everywhere sk_rf = SKLearner(Dict(:learner=>"RandomForestClassifier",:impl_args=>Dict(:n_estimators=>10)))

# Julia ML
@everywhere jrf = RandomForest(Dict(:impl_args=>Dict(:num_trees=>300)))
@everywhere jpt = PrunedTree()
@everywhere jada = Adaboost()

# Julia Ensembles
@everywhere jvote_ens=VoteEnsemble(Dict(:learners=>[jrf,jpt,sk_gb,sk_extratree,sk_rf]))
@everywhere jstack_ens=StackEnsemble(Dict(:learners=>[jrf,jpt,sk_gb,sk_extratree,sk_rf]))
@everywhere jbest_ens=BestLearner(Dict(:learners=>[jrf,sk_gb,sk_rf]))
@everywhere jsuper_ens=VoteEnsemble(Dict(:learners=>[jvote_ens,jstack_ens,jbest_ens,sk_rf,sk_gb]))

│ Loading required package: ggplot2
└ @ RCall /Users/ppalmes/.julia/packages/RCall/iojZI/src/io.jl:113
│ Type rfNews() to see new features/changes/bug fixes.
│ 
│ Attaching package: ‘randomForest’
│ 
│ The following object is masked from ‘package:ggplot2’:
│ 
│     margin
│ 
└ @ RCall /Users/ppalmes/.julia/packages/RCall/iojZI/src/io.jl:113
│ Loading required package: ggplot2
└ @ RCall ~/.julia/packages/RCall/iojZI/src/io.jl:113
│ Type rfNews() to see new features/changes/bug fixes.
│ 
│ Attaching package: ‘randomForest’
│ 
│ The following object is masked from ‘package:ggplot2’:
│ 
│     margin
│ 
└ @ RCall ~/.julia/packages/RCall/iojZI/src/io.jl:113
│ Loading required package: ggplot2
└ @ RCall ~/.julia/packages/RCall/iojZI/src/io.jl:113
│ Loading required package: ggplot2
└ @ RCall ~/.julia/packages/RCall/iojZI/src/io.jl:113
│ Loading required package: ggplot2
└ @ RCall ~/.julia/packages/RCall/iojZI/src/io.jl:113
│ Loading required package: ggplot2
└ @ RCall ~/.julia/packages/RCall/

## Functions for feature extraction and prediction

In [7]:
@everywhere function predict(learner,data,train_ind,test_ind)
    features = convert(Matrix,data[:, 1:(end-1)])
    labels = convert(Array,data[:, end])
    # Create pipeline
    pipeline = Pipeline(
       Dict(
         :transformers => [
           OneHotEncoder(), # Encodes nominal features into numeric
           Imputer(), # Imputes NA values
           StandardScaler(),
           learner # Predicts labels on instances
         ]
       )
    )
    # Train
    fit!(pipeline, features[train_ind, :], labels[train_ind]);
    # Predict
    predictions = transform!(pipeline, features[test_ind, :]);
    # Assess predictions
    result = score(:accuracy, labels[test_ind], predictions)
    return result
end

## Data processing and feature extraction

In [8]:
@everywhere function extract_features_from_timeseries(datadir)
  println("*** Extracting features ***")
  mdata = getstats(datadir)
  mdata[!,:dtype] = mdata[!,:dtype] |> Array{String}
  return mdata[!,3:(end-1)]
end

datadir = joinpath("data/")
tsdata = extract_features_from_timeseries(datadir)
first(tsdata,5)

*** Extracting features ***
getting stats of AirOffTemp1.csv
getting stats of AirOffTemp2.csv
getting stats of AirOffTemp3.csv
getting stats of AirOffTemp4.csv
getting stats of AirOffTemp5.csv
getting stats of Energy1.csv
getting stats of Energy2.csv
getting stats of Energy3.csv
getting stats of Energy4.csv
getting stats of Energy5.csv
getting stats of Pressure1.csv
getting stats of Pressure3.csv
getting stats of Pressure4.csv
getting stats of Pressure5.csv
getting stats of Pressure6.csv
getting stats of RetTemp11.csv
getting stats of RetTemp21.csv
getting stats of RetTemp31.csv
getting stats of RetTemp41.csv
getting stats of RetTemp51.csv


Unnamed: 0_level_0,sfreq,count,max,min,median,mean,q1,q2,q25,q75,q8,q9,kurtosis,skewness,variation,entropy,autocor,pacf,dtype
Unnamed: 0_level_1,Float64,Int64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,String
1,0.998658,745,8.9,3.1,4.7,5.22651,4.0,4.2,4.3,6.2,6.5,7.2,-0.503619,0.743989,0.248423,-6554.49,2.88221,1.18164,AirOffTemp
2,0.998658,745,5.2,2.0,2.8,2.9794,2.6,2.7,2.7,3.2,3.2,3.7,1.32258,1.00464,0.14924,-2447.02,1.80057,0.921901,AirOffTemp
3,0.998658,745,2.0,0.2,0.95,0.895235,0.4,0.5,0.5,1.1,1.3,1.4,-0.849288,0.0579184,0.433059,6.90205,0.213072,0.213816,AirOffTemp
4,0.998658,745,6.0,2.15,3.2,3.38745,2.7,2.7,3.0,3.7,3.7,4.2,1.24161,0.984734,0.196541,-3125.67,1.44612,0.679828,AirOffTemp
5,0.998658,745,9.0,3.7,5.7,6.00215,4.7,5.0,5.2,7.0,7.4,7.9,-0.757307,0.594082,0.200445,-8101.0,2.47319,1.11812,AirOffTemp


## Run in parallel all models in different trials

In [9]:
function parallelmodel(learners::Dict,data::DataFrame;trials=5)
    models=collect(keys(learners))
    ctable=@distributed (vcat) for i=1:trials
        # Split into training and test sets
        Random.seed!(3i)
        (train_ind, test_ind) = holdout(size(data, 1), 0.20)
        acc=@distributed (vcat) for model in models
            res=predict(learners[model],data,train_ind,test_ind)
            println("trial ",i,", ",model," => ",round(res))
            [model res i]
        end
        acc
    end
    df = ctable |> DataFrame
    rename!(df,:x1=>:model,:x2=>:acc,:x3=>:trial)
    gp=by(df,:model) do x
       DataFrame(mean=mean(x.acc),std=std(x.acc),n=length(x.acc)) 
    end
    sort!(gp,:mean,rev=true)
    return gp
end

parallelmodel (generic function with 1 method)

In [10]:
learners=Dict(
      :jvote_ens=>jvote_ens,:jstack_ens=>jstack_ens,:jbest_ens=>jbest_ens,
      :jrf => jrf,:jada=>jada,:jsuper_ens=>jsuper_ens,#:crt_rpart=>caret_rpart,
      :crt_svmlinear=>caret_svmlinear,:crt_treebag=>caret_treebag,#:crt_rf=>caret_rf, 
      :skl_knn=>sk_knn,:skl_gb=>sk_gb,:skl_extratree=>sk_extratree,
      :sk_rf => sk_rf
);

df = parallelmodel(learners,tsdata;trials=3)

      From worker 2:	trial 2, skl_extratree => 75.0
      From worker 3:	trial 1, skl_extratree => 100.0


│   caller = sexp(::Type{RCall.RClass{:list}}, ::DataFrame) at dataframe.jl:25
└ @ RCall ~/.julia/packages/RCall/iojZI/src/convert/dataframe.jl:25
│   caller = sexp(::Type{RCall.RClass{:list}}, ::DataFrame) at dataframe.jl:25
└ @ RCall ~/.julia/packages/RCall/iojZI/src/convert/dataframe.jl:25


      From worker 2:	trial 3, skl_extratree => 50.0
      From worker 3:	trial 1, crt_treebag => 100.0
      From worker 2:	trial 3, crt_treebag => 50.0
      From worker 2:	trial 2, crt_treebag => 100.0
      From worker 5:	trial 3, skl_gb => 25.0
      From worker 9:	trial 3, skl_knn => 50.0
      From worker 8:	trial 3, sk_rf => 75.0
      From worker 9:	trial 1, skl_knn => 75.0
      From worker 9:	trial 2, skl_knn => 75.0
      From worker 8:	trial 1, sk_rf => 75.0
      From worker 8:	trial 2, sk_rf => 100.0


│   caller = sexp(::Type{RCall.RClass{:list}}, ::DataFrame) at dataframe.jl:25
└ @ RCall ~/.julia/packages/RCall/iojZI/src/convert/dataframe.jl:25


      From worker 5:	trial 3, jada => 50.0
      From worker 5:	trial 1, skl_gb => 75.0
      From worker 5:	trial 1, jada => 25.0
      From worker 5:	trial 2, skl_gb => 75.0
      From worker 5:	trial 2, jada => 50.0
      From worker 2:	trial 1, jrf => 75.0
      From worker 3:	trial 2, jrf => 100.0
      From worker 6:	trial 2, crt_svmlinear => 75.0
      From worker 6:	trial 1, crt_svmlinear => 100.0
      From worker 6:	trial 3, crt_svmlinear => 75.0
      From worker 4:	trial 3, jrf => 25.0
      From worker 3:	trial 3, jstack_ens => 0.0
      From worker 4:	trial 1, jstack_ens => 0.0
      From worker 4:	trial 2, jstack_ens => 50.0
      From worker 3:	trial 3, jvote_ens => 50.0
      From worker 4:	trial 1, jvote_ens => 75.0
      From worker 4:	trial 2, jvote_ens => 75.0
      From worker 2:	trial 1, jbest_ens => 75.0
      From worker 4:	trial 3, jbest_ens => 25.0
      From worker 3:	trial 2, jbest_ens => 75.0
      From worker 7:	trial 3, jsuper_ens => 50.0
      From work

Unnamed: 0_level_0,model,mean,std,n
Unnamed: 0_level_1,Any,Float64,Float64,Int64
1,crt_treebag,83.3333,28.8675,3
2,crt_svmlinear,83.3333,14.4338,3
3,sk_rf,83.3333,14.4338,3
4,skl_extratree,75.0,25.0,3
5,jsuper_ens,75.0,25.0,3
6,jrf,66.6667,38.1881,3
7,jvote_ens,66.6667,14.4338,3
8,skl_knn,66.6667,14.4338,3
9,jbest_ens,58.3333,28.8675,3
10,skl_gb,58.3333,28.8675,3


In [6]:
using Base.Threads
nthreads()

8

In [10]:
using TSML
using TSMLextra
using StatsBase: iqr
using RDatasets
ENV["COLUMNS"]=1000; # for dataframe column size

┌ Info: Precompiling TSMLextra [0c7047ce-818d-11e9-1109-0323cd70e08d]
└ @ Base loading.jl:1242
│ This may mean TSML [198dc43e-9e51-5cd7-9d40-d9794d335912] does not support precompilation but is imported by a module that does.
└ @ Base loading.jl:1000
┌ Info: Skipping precompilation since __precompile__(false). Importing TSMLextra [0c7047ce-818d-11e9-1109-0323cd70e08d].
└ @ Base loading.jl:1017
┌ Info: Precompiling RCall [6f49c342-dc21-5d91-9882-a32aef131414]
└ @ Base loading.jl:1242
│ This may mean DataFrames [a93c6f00-e57d-5684-b7b6-d8193f3e46c0] does not support precompilation but is imported by a module that does.
└ @ Base loading.jl:1000
┌ Info: Skipping precompilation since __precompile__(false). Importing RCall [6f49c342-dc21-5d91-9882-a32aef131414].
└ @ Base loading.jl:1017


LoadError: LoadError: LoadError: LoadError: StackOverflowError:
in expression starting at /Users/ppalmes/.julia/packages/RCall/g7dhB/src/convert/missing.jl:2
in expression starting at /Users/ppalmes/.julia/packages/RCall/g7dhB/src/RCall.jl:44
in expression starting at /Users/ppalmes/.julia/packages/TSMLextra/3T9eY/src/system.jl:8