In [1]:
%%bash
lein uberjar

Compiling mtdm.classifier.base
Compiling mtdm.classifier.distributed.aggregation
Compiling mtdm.classifier.distributed.base
Compiling mtdm.classifier.distributed.distributed
Compiling mtdm.classifier.distributed.dynamic-distributed
Compiling mtdm.classifier.distributed.dynamic-monitors
Compiling mtdm.classifier.distributed.sites
Compiling mtdm.classifier.moa-classifier
Compiling mtdm.classifier.random
Compiling mtdm.core
Compiling mtdm.dataset.base
Compiling mtdm.dataset.csv-dataset
Compiling mtdm.dataset.moa
Compiling mtdm.dataset.probabilistic
Compiling mtdm.dataset.save-csv
Compiling mtdm.dataset.tree-based
Compiling mtdm.evaluation.timing
Compiling mtdm.evaluation
Compiling mtdm.fourier.base
Compiling mtdm.fourier.trouble
Compiling mtdm.monitor.accuracy
Compiling mtdm.monitor.agreement
Compiling mtdm.monitor.base
Compiling mtdm.monitor.drift
Compiling mtdm.monitor.threshold
Compiling mtdm.monitor.window
Compiling mtdm.training
Compiling mtdm.trees.base
Compiling mtdm.trees.graph-tr

In [2]:
%classpath add jar ../target/jvm/uberjar/mtdm-0.1.0-SNAPSHOT-standalone.jar
(clojure.lang.Compiler/loadFile "jupyter_helpers.clj")
(require '[clojure.java.io :as io]
         '[clojure.string :as string]
         '[clojure.pprint :refer [pprint print-table]]
         '[jupyter-helpers :refer [run-and-save-experiments load-experiment site-summary-table experiment-timeline experiments-summary-table
                                   display-html confusion-summary display-experiment-models get-best-experiment-label load-experiment-pair
                                   get-timing-evaluation timing-table get-experiment-block-accuracies experiment-block-accuracies-table
                                   save-data load-data]]
         '[mtdm.evaluation :refer [get-order-summary]]
         '[mtdm.classifier.distributed.sites :refer [make-site-structure p-site t-site]]
         '[mtdm.classifier.moa-classifier :refer [adaptive-random-forest naive-bayes]]
         '[mtdm.dataset.csv-dataset :refer [read-csv-dataset]]
         '[mtdm.utils.stats :refer [mann-whitney-u-test wilcoxon-signed-rank-test]])

null

## Experiment Setup

In [3]:
;; Dataset Configuration

(defn dataset-fn []
    (read-csv-dataset "datasets/sensorless-drive/sensorless-drive-interleaved.csv"
                      999999 ;; All records
                      ;; Numeric features (feature_0-feature_47)
                      (into [] (map #(str "feature_" %) (range 48)))))

(defn dataset-fn1 []
    (read-csv-dataset "datasets/sensorless-drive/sensorless-drive-interleaved.csv"
                      50000 ;; All records
                      ;; Numeric features (feature_0-feature_47)
                      (into [] (map #(str "feature_" %) (range 48)))))

(def dataset-name "sensorless-drive")

(def feature-count 48) ;; Not including class
(def features-per-site 2)

(def trouble-factors (map #(* features-per-site %) [1 1.5 2]))
(def p-sites
  [
(p-site :set1 [0 24])
(p-site :set2 [1 25])
(p-site :set3 [2 26])
(p-site :set4 [3 27])
(p-site :set5 [4 28])
(p-site :set6 [5 29])
(p-site :set7 [6 30])
(p-site :set8 [7 31])
(p-site :set9 [8 32])
(p-site :set10 [9 33])
(p-site :set11 [10 34])
(p-site :set12 [11 35])

(p-site :set13 [12 36])
(p-site :set14 [13 37])
(p-site :set15 [14 38])
(p-site :set16 [15 39])
(p-site :set17 [16 40])
(p-site :set18 [17 41])
(p-site :set19 [18 42])
(p-site :set20 [19 43])
(p-site :set21 [20 44])
(p-site :set22 [21 45])
(p-site :set23 [22 46])
(p-site :set24 [23 47])

])

(def dataset (dataset-fn))
(defn dataset-fn [] dataset)

(def dataset1 (dataset-fn1))
(defn dataset-fn1 [] dataset1)

#'beaker_clojure_shell_eaf5d33d-f855-4143-8777-6f984177b053/dataset-fn1

In [4]:
;; Experiment Configuration

(def base-classifier naive-bayes)
(def trouble-classifier adaptive-random-forest)

(def base-setup
    {:dataset-description (keyword dataset-name)
     :dataset-fn dataset-fn
     :dataset-fn1 dataset-fn1
     :trouble-classifier trouble-classifier
     :base-site-structure (apply make-site-structure feature-count p-sites)
     ;; Disable monitor logging because this is a long-running experiment
     ;; where detailed logging data is excessively large.
     :disable-monitor-logging true
     :grace-period 5
     :split-confidence 0.0001
     :selection-records 0
     :min-records 50000
     })

(def aggregation-rules {:max-conf {:type :max-conf}
                        :simple-voting {:type :simple-voting}
                        :stacked {:type :two-level-stacked
                        :classifier-generator trouble-classifier}
    })

(def base-system-config
    {:site-window-size 1000
     :site-training-time 0
     :shared-sources? false
     :creation-window-size 1000
     :creation-time-threshold 500
     :removal-window-size 1000
     :removal-time-threshold 500
     :creation-agreement-threshold {:type :smoothed-hoeffding-bound
                                    :r 1 :delta 0.001 :sharpness 5}
     :removal-accuracy-threshold {:type :hoeffding-bound
                                  :r 1 :delta 0.001}
     :removal-usage-threshold 0.05})


(def mtdm-experiments
    (for [trouble-factor trouble-factors
          [aggregation-rule-key aggregation-rule] aggregation-rules]
        (assoc base-setup
            :label (keyword (str dataset-name "-mtdm"
                                 aggregation-rule-key
                                 "-tf" trouble-factor))
            :p-site-aggregation-rule aggregation-rule
            :system-config (assoc base-system-config
                               :trouble-factor trouble-factor))))

(def experiments mtdm-experiments)

(def output-dir (str "workspace/" dataset-name))

#'beaker_clojure_shell_eaf5d33d-f855-4143-8777-6f984177b053/output-dir

## Run Experiments

In [5]:
(import '[com.twosigma.beakerx.widget Output])
(def experiment-output (Output.))
experiment-output

In [6]:
(run-and-save-experiments output-dir base-classifier experiments
                          :beaker-output experiment-output
                          :thread-count 1)

time-tree 0.872
time-parse 0.1294
time-tsite 13118.995899999998
time-tree 0.4452
time-parse 0.1107
time-tsite 14426.564199999966
time-tree 0.3394
time-parse 0.0574
time-tsite 13692.06039999999


[{:full {:mean-max-transmission-100 10670024776931001029/215029451467965086568, :full-comm 46453/1773, :proportion-transmitted 389155/351054, :accuracy 236/297, :time-cal 1240979564061, :mean-critical-path-time 9514886704/58509, :total-communication 46453/1773, :time-update-agree 178331.578199999, :time-tsite 13118.995899999998, :f-score {:unweighted-mean 0.7971351471814242, :weighted-mean 0.7971351471814242, :micro 0.79461277}, :time-wall-pairs 9266300, :prim-comm 400000/19503, :time-func 1.3172, :time-cpu-pairs 8871335, :total-pairs 193}, :label :sensorless-drive-mtdm:max-conf-tf2, :skip-1000 {:mean-max-transmission-100 10670024776931001029/211354299756810134568, :full-comm 1532949/57509, :proportion-transmitted 389155/345054, :accuracy 45531/57509, :time-cal 1240979564061, :mean-critical-path-time 9497848924/57509, :total-communication 1532949/57509, :time-update-agree 177880.981599999, :time-tsite nil, :f-score {:unweighted-mean 0.7942811250686646, :weighted-mean 0.7943046868120026

## Experiments Summary

In [7]:
(def experiments-summary
    (load-experiment output-dir :summary))

(experiments-summary-table experiments-summary)

## Accuracy and Transmission Over Time 

In [8]:
(def performance-measure [:skip-1000 :accuracy])
(def partition-size 100)

#'beaker_clojure_shell_eaf5d33d-f855-4143-8777-6f984177b053/partition-size

### Maximum Confidence Aggregation

In [9]:
(def max-conf-experiments (load-experiment-pair output-dir experiments-summary performance-measure :max-conf))

#'beaker_clojure_shell_eaf5d33d-f855-4143-8777-6f984177b053/max-conf-experiments

In [10]:
(let [experiments max-conf-experiments
      result-count (count (:results (:mtdm experiments)))]
    (.display (experiment-timeline (:mtdm experiments)
                                   (dissoc experiments :mtdm)
                                   :partition-size partition-size
                                   :x-bounds [0 result-count]
                                   :y-bounds-acc [0 1.2]
                                   :y-bounds-data [0 1.5]
                                    :y-bounds-agree [0 300]
                                   :event-rows 6)))

null

### Simple Voting Aggregation

In [None]:
(def voting-experiments (load-experiment-pair output-dir experiments-summary performance-measure :simple-voting))

In [None]:
(let [experiments voting-experiments
      result-count (count (:results (:mtdm experiments)))]
    (.display (experiment-timeline (:mtdm experiments)
                                   (dissoc experiments :mtdm)
                                   :partition-size partition-size
                                   :x-bounds [0 result-count]
                                   :y-bounds-acc [0 1.5]
                                   :y-bounds-data [0 1.5]
                                   :y-bounds-agree [0 300]
                                   :event-rows 6)))

### Stacked Aggregation

In [None]:
(def stacked-experiments (load-experiment-pair output-dir experiments-summary performance-measure :stacked))

In [None]:
(let [experiments stacked-experiments
      result-count (count (:results (:mtdm experiments)))]
    (.display (experiment-timeline (:mtdm experiments)
                                   (dissoc experiments :mtdm)
                                   :partition-size partition-size
                                   :x-bounds [0 result-count]
                                   :y-bounds-acc [0 1.5]
                                   :y-bounds-data [0 0.7]
                                   :y-bounds-agree [0 300]
                                   :event-rows 6)))

## Timing Evaluation

In [None]:
(def timing-evaluation (get-timing-evaluation output-dir
                                              (distinct (map :label experiments-summary)) ;; Drop repeated experiment labels
                                              1000))

In [None]:
(save-data (str output-dir "/:timing-summary.edn") timing-evaluation)

In [None]:
(timing-table timing-evaluation)