Skip to content
This repository has been archived by the owner on May 7, 2020. It is now read-only.

Commit

Permalink
add code to use opendata
Browse files Browse the repository at this point in the history
... with all test set labels. It's available at:

http://opendata.cern.ch/collection/ATLAS-Higgs-Challenge-2014
  • Loading branch information
melisgl committed Mar 23, 2015
1 parent d12c89f commit a496793
Show file tree
Hide file tree
Showing 6 changed files with 180 additions and 106 deletions.
135 changes: 79 additions & 56 deletions src/bpn.lisp
Expand Up @@ -107,7 +107,7 @@

(defclass higgs-base-trainer ()
((training :initarg :training :reader training)
(test :initarg :test :reader test)))
(tests :initarg :tests :reader tests)))

(defun log-training-period (trainer learner)
(declare (ignore learner))
Expand Down Expand Up @@ -155,10 +155,12 @@
(defun log-test-error (trainer learner)
(let* ((*random-state* (make-random-state nil))
(bpn (bpn learner))
(test (test trainer))
(tests (tests trainer))
(training (training trainer))
(training-predictions (predict-batch-with-bpn bpn training))
(test-predictions (predict-batch-with-bpn bpn test)))
(test-prediction-seqs (mapcar (lambda (test)
(predict-batch-with-bpn bpn test))
tests)))
(when (<= (* 60 (length training)) (n-instances trainer))
(let* ((extra-example-weights (extra-example-weights bpn))
(n (length training-predictions))
Expand Down Expand Up @@ -207,11 +209,16 @@
(make-cost-monitors
bpn :attributes '(:event "pred."
:dataset "train")))
(monitor-bpn-results (make-sampler test) (bpn learner)
(make-cost-monitors
(bpn learner) :attributes '(:event "pred."
:dataset "test")))))
(log-thresholds training-predictions test-predictions))
(loop for test in tests
for i upfrom 0
nconc (monitor-bpn-results
(make-sampler test) (bpn learner)
(make-cost-monitors
(bpn learner)
:attributes `(:event "pred."
:dataset
,(format nil "test-~D" i)))))))
(log-thresholds training-predictions test-prediction-seqs))
(log-msg "---------------------------------------------------~%"))


Expand Down Expand Up @@ -269,7 +276,7 @@
(second (second name))
(second name))))

(defun train-higgs-bpn-gd (bpn training test &key
(defun train-higgs-bpn-gd (bpn training tests &key
(n-epochs 200) l2-upper-bound
learning-rate learning-rate-decay
(n-epochs-to-reach-final-momentum 500)
Expand Down Expand Up @@ -315,7 +322,7 @@
(let ((trainer (monitor-optimization-periodically
(make-instance 'higgs-bpn-gd-trainer
:training training
:test test
:tests tests
:segmenter (make-segmenter #'make-trainer))
'((:fn log-test-error
:period log-test-period)
Expand Down Expand Up @@ -353,7 +360,7 @@
(build-fnn (:class 'higgs-bpn :max-n-stripes 96)
(inputs (->input :dropout nil :size *n-encoded-features*))
(f1-activations (->activation inputs :name 'f1 :size n))
(f1* (->max-channel f1-activations :group-size group-size ))
(f1* (->max-channel f1-activations :group-size group-size))
(f1 (->dropout f1* :dropout dropout))
(f2-activations (->activation f1 :name 'f2 :size n))
(f2* (->max-channel f2-activations :group-size group-size))
Expand All @@ -370,12 +377,12 @@
(make-encoder training :transformers (make-transformers)))
bpn))

(defun train-higgs/4 (&key training test quick-run-p bpn-var bpn-filename)
(defun train-higgs/4 (&key training tests quick-run-p bpn-var bpn-filename)
(repeatably ()
(let ((bpn nil))
(setq* (bpn bpn-var) (make-higgs-bpn training))
(init-bpn-weights bpn :stddev 0.01)
(train-higgs-bpn-gd bpn training test
(train-higgs-bpn-gd bpn training tests
:n-epochs (if quick-run-p 2 200)
:n-epochs-to-reach-final-momentum 100
:learning-rate 1
Expand All @@ -389,24 +396,23 @@
(defvar *bpn/4*)

(defun run-quick-test ()
(train-higgs/4 :training (training-examples) :test (test-examples)
(train-higgs/4 :training (training-examples) :tests (list (test-examples))
:quick-run-p t))


;;;; Cross-validation

(defun train-4 (&key training test filename)
(let* ((bpn (train-higgs/4 :training training :test test
:bpn-var '*bpn/4*
:bpn-filename filename))
(test-predictions (predict-batch-with-bpn bpn test)))
(values bpn test-predictions)))
(defun train-4 (&key training tests filename)
(train-higgs/4 :training training :tests tests
:bpn-var '*bpn/4*
:bpn-filename filename))

(defun run-cv-bagging (fn &key (save-dir *model-dir*)
(training (training-examples))
(test (test-examples))
(n-folds 2)
n-iterations)
#+nil
(assert (or (not (uiop/filesystem:directory-exists-p save-dir))
(endp (directory (merge-pathnames "*" save-dir)))))
(ensure-directories-exist save-dir)
Expand All @@ -419,40 +425,56 @@
(describe-examples "in-bag" in-bag)
(describe-examples "out-of-bag" out-of-bag)
(sb-ext:gc :full t)
(save-training in-bag (merge-pathnames
(format nil "bag-~S-training.csv"
bag-index)
save-dir))
(multiple-value-bind (bpn out-of-bag-predictions)
(let ((*experiment-random-seed*
(+ *experiment-random-seed* bag-index)))
(funcall fn :training in-bag
:test out-of-bag
:filename (merge-pathnames
(format nil "bag-~S-model-bpn"
bag-index)
save-dir)))
(save-predictions
out-of-bag-predictions
(merge-pathnames
(format nil "bag-~S-out-of-bag-predictions-bpn" bag-index)
save-dir))
(when test
(save-predictions
(predict-batch-with-bpn bpn test)
(merge-pathnames
(format nil "bag-~S-test-predictions-bpn" bag-index)
save-dir)))
(log-msg "Finished bag ~S~%" bag-index)
(let ((predictions
(average-overlapping-predictions
(mapcar
#'load-predictions
(directory (merge-pathnames
"bag-*-out-of-bag-predictions-bpn"
save-dir))))))
(log-msg "Test results with ~S bags~%" (1+ bag-index))
(log-thresholds () predictions))
(let ((training-file (merge-pathnames
(format nil "bag-~S-training.csv"
bag-index)
save-dir))
(model-file (merge-pathnames
(format nil "bag-~S-model-bpn" bag-index)
save-dir))
(out-of-bag-predictions-file
(merge-pathnames
(format nil "bag-~S-out-of-bag-predictions-bpn"
bag-index)
save-dir))
(test-predictions-file
(merge-pathnames
(format nil "bag-~S-test-predictions-bpn" bag-index)
save-dir)))
(unless (probe-file model-file)
(save-training in-bag training-file :if-exists :supersede)
(let* ((bpn (let ((*experiment-random-seed*
(+ *experiment-random-seed* bag-index)))
(funcall fn :training in-bag
:tests (list out-of-bag test)
:filename model-file)))
(out-of-bag-predictions
(predict-batch-with-bpn bpn out-of-bag)))
(save-predictions
out-of-bag-predictions
out-of-bag-predictions-file)
(when test
(save-predictions
(predict-batch-with-bpn bpn test)
test-predictions-file))
(log-msg "Finished bag ~S~%" bag-index)
(let ((out-of-bag-predictions
(average-overlapping-predictions
(mapcar
#'load-predictions
(directory (merge-pathnames
"bag-*-out-of-bag-predictions-bpn"
save-dir)))))
(test-predictions
(average-overlapping-predictions
(mapcar
#'load-predictions
(directory (merge-pathnames
"bag-*-test-predictions-bpn"
save-dir))))))
(log-msg "Test results with ~S bags~%" (1+ bag-index))
(log-thresholds () (list out-of-bag-predictions
test-predictions)))))
(incf bag-index)
(values)))
:n-folds n-folds
Expand All @@ -476,9 +498,10 @@
(run-quick-test))
(progn
(makunbound 'higgs-boson::*event-id-to-example*)
(clrhash higgs-boson::*event-id-to-example*)
(makunbound 'higgs-boson::*training-examples*)
(makunbound 'higgs-boson::*test-examples*))
(makunbound 'higgs-boson::*test-examples*)
(makunbound 'higgs-boson::*opendata-examples*))
(let ((*experiment-random-seed* 1234)
(*default-mat-ctype* :float))
Expand Down
1 change: 1 addition & 0 deletions src/config.lisp
Expand Up @@ -31,3 +31,4 @@

(defparameter *training-file* (merge-pathnames "training.csv" *data-dir*))
(defparameter *test-file* (merge-pathnames "test.csv" *data-dir*))
(defparameter *opendata-file* (merge-pathnames "opendata.csv" *data-dir*))
49 changes: 45 additions & 4 deletions src/csv.lisp
Expand Up @@ -19,6 +19,21 @@
(if *test-file*
(slurp-csv *test-file*)
nil))))

(defvar *opendata-examples*)

(defvar *event-id-multiplier* 1)

(defun opendata-examples ()
(if (boundp '*opendata-examples*)
*opendata-examples*
(setq *opendata-examples*
(if *opendata-file*
;; avoid collision with kaggle training.csv and
;; test.csv ids
(let ((*event-id-multiplier* -1))
(slurp-csv *opendata-file*))
nil))))


(defun map-csv-records (fn filename &key skip-header header-p-fn)
Expand Down Expand Up @@ -75,8 +90,19 @@
(defun csv-record-to-example (record filename file-position)
(declare (ignore filename file-position))
(let* ((n (length record))
(event-id (parse-integer (first record))))
(cond ((find-example-by-event-id event-id :errorp nil))
(event-id (* (parse-integer (first record))
*event-id-multiplier*))
(example (find-example-by-event-id event-id :errorp nil)))
(cond (example
(when (and (null (example-weight example))
(<= (+ 3 *n-features* (length record))))
(setf (example-weight example)
(parse-float (elt record (+ 1 *n-features*)))))
(when (and (null (example-label example))
(<= (+ 3 *n-features* (length record))))
(setf (example-label example)
(parse-label (elt record (+ 2 *n-features*)))))
example)
((= n (+ 1 *n-features*))
(setf (gethash event-id *event-id-to-example*)
(make-example
Expand All @@ -90,6 +116,19 @@
(subseq record 1 (+ 1 *n-features*)))
:weight (parse-float (elt record (+ 1 *n-features*)))
:label (parse-label (elt record (+ 2 *n-features*))))))
;; opendata has KaggleSet and KaggleWeight columns as well
((= n (+ 5 *n-features*))
(let ((weight (parse-float (elt record (+ 1 *n-features*))))
(label (parse-label (elt record (+ 2 *n-features*)))))
(assert label)
(assert weight)
(setf (gethash event-id *event-id-to-example*)
(make-example
:event-id event-id
:features (parse-feature-vector
(subseq record 1 (+ 1 *n-features*)))
:weight weight
:label label))))
(t
(error "Unexpected number of columns.")))))

Expand Down Expand Up @@ -119,9 +158,11 @@
stream))

(defun save-training (training training-file
&key (features-fn #'example-features))
&key (features-fn #'example-features)
(if-exists :error))
(with-open-file (training-stream training-file :direction :output
:if-does-not-exist :create)
:if-does-not-exist :create
:if-exists if-exists)
(write-training-csv-header training-stream)
(dolist (example training)
(write-example example training-stream
Expand Down

0 comments on commit a496793

Please sign in to comment.