In [None]:
%%bash
lein uberjar

In [None]:
%classpath add jar ../target/jvm/uberjar/hdsm-0.1.0-SNAPSHOT-standalone.jar
(clojure.lang.Compiler/loadFile "jupyter_helpers.clj")
(require '[clojure.java.io :as io]
         '[clojure.string :as string]
         '[clojure.pprint :refer [pprint print-table]]
         '[jupyter-helpers :refer [run-and-save-experiments load-experiment site-summary-table experiment-timeline experiments-summary-table
                                   display-html confusion-summary display-experiment-models get-best-experiment-label load-experiment-pair
                                   get-timing-evaluation timing-table get-experiment-block-accuracies experiment-block-accuracies-table
                                   save-data load-data]]
         '[hdsm.classifier.distributed.sites :refer [make-site-structure p-site t-site]]
         '[hdsm.classifier.moa-classifier :refer [adaptive-random-forest naive-bayes]]
         '[hdsm.dataset.csv-dataset :refer [read-csv-dataset]]
         '[hdsm.utils.stats :refer [mann-whitney-u-test wilcoxon-signed-rank-test]])

## Experiment Setup

In [None]:
;; Dataset Configuration

(defn dataset-fn []
    (read-csv-dataset "datasets/gesture_master/gesture-master.csv"
                      999999 ;; All records
                      ;; Numeric features (all excluding class)
                     ["channel1" "channel2" "channel3" "channel4" "channel5" "channel6" "channel7" "channel8"]))
(def dataset-name "gesture-master")
(def feature-count 8) ;; Not including class
(def features-per-site 1)

(def trouble-factors (map #(* features-per-site %) [1 1.5 2]))
(def p-sites
 [(p-site :channel1 [0])
     (p-site :channel2 [1])
     (p-site :channel3 [2])
     (p-site :channel4 [3])
     (p-site :channel5 [4])
     (p-site :channel6 [5])
 (p-site :channel7 [6])
 (p-site :channel8 [7])
]
)


(def dataset (dataset-fn))
(defn dataset-fn [] dataset)

In [None]:
;; Experiment Configuration

(def base-classifier naive-bayes)
(def trouble-classifier adaptive-random-forest)

(def base-setup
    {:dataset-description (keyword dataset-name)
     :dataset-fn dataset-fn
     :trouble-classifier trouble-classifier
     :base-site-structure (apply make-site-structure feature-count p-sites)
     ;; Disable monitor logging because this is a long-running experiment
     ;; where detailed logging data is excessively large.
     :disable-monitor-logging true
     :batch1 10000})

(def aggregation-rules {:max-conf {:type :max-conf}
                       :simple-voting {:type :simple-voting}
                       :stacked {:type :two-level-stacked
                        :classifier-generator trouble-classifier}
    })

(def base-system-config
    {:site-window-size 1000
     :site-training-time 0
     :shared-sources? false
     :creation-window-size 1000
     :creation-time-threshold 500
     :removal-window-size 1000
     :removal-time-threshold 500
     :creation-agreement-threshold {:type :smoothed-hoeffding-bound
                                    :r 1 :delta 0.001 :sharpness 5}
     :removal-accuracy-threshold {:type :hoeffding-bound
                                  :r 1 :delta 0.001}
     :removal-usage-threshold 0.05})


(def naive-experiments
(for [[aggregation-rule-key aggregation-rule] aggregation-rules]
    (assoc base-setup
           :label (keyword (str dataset-name "-naive" aggregation-rule-key))
            :p-site-aggregation-rule aggregation-rule
            :system-config :naive)))

(def park-experiments
    (for [trouble-factor trouble-factors]
        (assoc base-setup
            :label (keyword (str dataset-name "-park"
                                 "-tf" trouble-factor))
           :base-site-structure (apply make-site-structure feature-count
                                    (concat p-sites [(t-site :trouble (map :label p-sites) trouble-factor)]))
            :disable-monitors true
           :p-site-aggregation-rule {:type :max-conf}
            :system-config (assoc base-system-config
                              :trouble-factor trouble-factor
                               :creation-agreement-threshold 999
                              :removal-accuracy-threshold -999
                             :removal-usage-threshold -999))))

(def hdsm-experiments
    (for [trouble-factor trouble-factors
          [aggregation-rule-key aggregation-rule] aggregation-rules]
        (assoc base-setup
            :label (keyword (str dataset-name "-hdsm"
                                 aggregation-rule-key
                                 "-tf" trouble-factor))
            :p-site-aggregation-rule aggregation-rule
            :system-config (assoc base-system-config
                               :trouble-factor trouble-factor))))

(def experiments 
    (concat naive-experiments
            park-experiments
            hdsm-experiments
    )

)

(def output-dir (str "workspace/" dataset-name))

## Run Experiments

In [None]:
(import '[com.twosigma.beakerx.widget Output])
(def experiment-output (Output.))
experiment-output

In [None]:
(run-and-save-experiments output-dir base-classifier experiments
                          :beaker-output experiment-output
                          :thread-count 1)

## Experiments Summary

In [None]:
(def experiments-summary
    (load-experiment output-dir :summary))

(experiments-summary-table experiments-summary)

## Accuracy and Transmission Over Time 

In [None]:
(def performance-measure [:skip-1000 :accuracy])
(def partition-size 100)

### Maximum Confidence Aggregation

In [None]:
(def max-conf-experiments (load-experiment-pair output-dir experiments-summary performance-measure :max-conf))

In [None]:
(let [experiments max-conf-experiments
      result-count (count (:results (:hdsm experiments)))]
    (.display (experiment-timeline (:hdsm experiments)
                                   (dissoc experiments :hdsm)
                                   :partition-size partition-size
                                   :x-bounds [0 result-count]
                                   :y-bounds-acc [0 1.2]
                                   :y-bounds-data [0 1.5]
                                   :event-rows 6)))

### Simple Voting Aggregation

In [None]:
(def voting-experiments (load-experiment-pair output-dir experiments-summary performance-measure :simple-voting))

In [None]:
(let [experiments voting-experiments
      result-count (count (:results (:hdsm experiments)))]
    (.display (experiment-timeline (:hdsm experiments)
                                   (dissoc experiments :hdsm)
                                   :partition-size partition-size
                                   :x-bounds [0 result-count]
                                   :y-bounds-acc [0 1.5]
                                   :y-bounds-data [0 1.5]
                                   :event-rows 6)))

### Stacked Aggregation

In [None]:
(def stacked-experiments (load-experiment-pair output-dir experiments-summary performance-measure :stacked))

In [None]:
(let [experiments stacked-experiments
      result-count (count (:results (:hdsm experiments)))]
    (.display (experiment-timeline (:hdsm experiments)
                                   (dissoc experiments :hdsm)
                                   :partition-size partition-size
                                   :x-bounds [0 result-count]
                                   :y-bounds-acc [0.5 1.5]
                                   :y-bounds-data [0 0.7]
                                   :event-rows 6)))

## Timing Evaluation

In [None]:
(def timing-evaluation (get-timing-evaluation output-dir
                                              (distinct (map :label experiments-summary)) ;; Drop repeated experiment labels
                                              1000))

In [None]:
(save-data (str output-dir "/:timing-summary.edn") timing-evaluation)

In [None]:
(timing-table timing-evaluation)

## Centralised Accuracy

In [None]:
(def central-output-dir (str output-dir "/central"))

In [None]:
(def centralise-all-trouble-factor (* 2 feature-count))

(def centralised-experiment
    (assoc base-setup
        :label (keyword (str dataset-name "-centralised"))
        :base-site-structure
        (apply make-site-structure feature-count
            (concat p-sites [(t-site :central (map :label p-sites) centralise-all-trouble-factor)]))
        :p-site-aggregation-rule {:type :max-conf}
        :disable-monitors true
        :system-config (assoc base-system-config
                               :trouble-factor centralise-all-trouble-factor
                               :creation-agreement-threshold 999
                               :removal-accuracy-threshold -999
                               :removal-usage-threshold -999)))

(run-and-save-experiments central-output-dir trouble-classifier [centralised-experiment]
                          :thread-count 1)

In [None]:
(def central-experiments-summary
    (load-experiment central-output-dir :summary))
    
(experiments-summary-table central-experiments-summary)