diff --git a/.gitignore b/.gitignore index c1cb6c9db..3775f8ce8 100644 --- a/.gitignore +++ b/.gitignore @@ -24,4 +24,6 @@ cov/* out .idea clojush.iml -.gorilla-port \ No newline at end of file +.gorilla-port +/clojush/ +.ipynb_checkpoints/ \ No newline at end of file diff --git a/project.clj b/project.clj index cd96eed8b..ed251aafa 100644 --- a/project.clj +++ b/project.clj @@ -1,6 +1,6 @@ (defproject clojush "2.31.0-1-SNAPSHOT" :description "The Push programming language and the PushGP genetic programming - system implemented in Clojure. See http://pushlanguage.com" + system implemented in Clojure. See http://pushlanguage.com" :license {:name "Eclipse Public License" :url "http://www.eclipse.org/legal/epl-v10.html"} :dependencies [[org.clojure/clojure "1.8.0"] @@ -12,8 +12,9 @@ [clojure-csv "2.0.1"] [org.clojure/data.json "0.2.6"] [clj-random "0.1.7"] - ;; https://mvnrepository.com/artifact/org.apache.commons/commons-math3 - [org.apache.commons/commons-math3 "3.2"]] + ;; https://mvnrepository.com/artifact/org.apache.commons/commons-math3 + [org.apache.commons/commons-math3 "3.2"] + [cheshire "5.7.1"]] :plugins [[lein-codox "0.9.1"] [lein-shell "0.5.0"] [lein-gorilla "0.4.0"] @@ -53,4 +54,3 @@ ;;"-XX:+UseG1GC" ;:jvm-opts ["-Xmx12g" "-Xms12g" "-XX:+UseParallelGC"] :main clojush.core) - diff --git a/run-fly b/run-fly index 038682c32..565f1cea5 100755 --- a/run-fly +++ b/run-fly @@ -25,7 +25,6 @@ homedir="/home/${fly_user}" rundir="$homedir/runs/$label-$number" repodir="$rundir/Clojush" outputdir="$rundir/output" - ssh ${fly_user}@fly.hampshire.edu "mkdir -p $rundir" rsync \ @@ -49,7 +48,7 @@ ssh ${fly_user}@fly.hampshire.edu /opt/pixar/tractor-blade-1.7.2/tractor-spool.p --jobcwd="${repodir}" \ --priority=1 \ --range 1-${n} \ - -c "bash -c 'env JAVA_CMD=/usr/java/latest/bin/java /share/apps/bin/lein run $lein_command > $outputdir/RANGE.out 2> $outputdir/RANGE.err'" + -c "bash -c 'env JAVA_CMD=/usr/java/latest/bin/java /share/apps/bin/lein run $lein_command :label $label > $outputdir/RANGE.out 2> $outputdir/RANGE.err'" echo "Job ID: ${number}" diff --git a/src/clojush/args.clj b/src/clojush/args.clj index 60cc65290..b15bf951f 100644 --- a/src/clojush/args.clj +++ b/src/clojush/args.clj @@ -411,7 +411,7 @@ ;; When true, will exit the run when there is an individual with a zero-error vector ;;---------------------------------------- - ;; Arguments related to printing JSON, EDN, or CSV logs + ;; Arguments related to printing JSON, EDN, CSV, and remote recording ;;---------------------------------------- :print-csv-logs false @@ -450,6 +450,14 @@ :json-log-program-strings false ;; If true, JSON logs will include program strings for each individual. + + :record-host nil + ;; Should be in the format ":" + ;; If set, will send logs of each run to a server running on this + ;; host + :label nil + ;; If set, will send this in the configuration of the run, to the + ;; external record ))) (defn load-push-argmap diff --git a/src/clojush/core.clj b/src/clojush/core.clj index df8809418..e2bbe3302 100644 --- a/src/clojush/core.clj +++ b/src/clojush/core.clj @@ -16,6 +16,7 @@ ;; for more details. (ns clojush.core + (:require [clojush.pushgp.record :as r]) (:use [clojush.pushgp pushgp report]) (:gen-class)) @@ -30,12 +31,13 @@ This allows one to run an example with a call from the OS shell prompt like: lein run examples.simple-regression :population-size 3000" [& args] + (r/new-run!) (println "Command line args:" (apply str (interpose \space args))) (let [param-list (map #(if (.endsWith % ".ser") (str %) (read-string %)) (rest args))] - (require (symbol (first args))) + (require (symbol (r/config-data! [:problem-file] (first args)))) (let [example-params (eval (symbol (str (first args) "/argmap"))) params (merge example-params (apply sorted-map param-list))] (println "######################################") diff --git a/src/clojush/problems/software/replace_space_with_newline.clj b/src/clojush/problems/software/replace_space_with_newline.clj index e18aca417..69f1d595a 100644 --- a/src/clojush/problems/software/replace_space_with_newline.clj +++ b/src/clojush/problems/software/replace_space_with_newline.clj @@ -15,7 +15,8 @@ [clojush pushstate interpreter random util globals] clojush.instructions.tag clojure.math.numeric-tower) - (:require [clojure.string :as string])) + (:require [clojure.string :as string] + [clojush.pushgp.record :refer [generation-data!]])) ;; Define test cases (defn replace-space-input @@ -145,12 +146,14 @@ "Custom generational report." [best population generation error-function report-simplifications] (let [best-program (not-lazy (:program best)) - best-test-errors (error-function best-program :test) + best-test-errors (generation-data! [:best :test-errors] (error-function best-program :test)) best-total-test-error (apply +' best-test-errors)] (println ";;******************************") (printf ";; -*- Replace Space With Newline problem report - generation %s\n" generation)(flush) (println "Test total error for best:" best-total-test-error) - (println (format "Test mean error for best: %.5f" (double (/ best-total-test-error (count best-test-errors))))) + (println (format "Test mean error for best: %.5f" + (generation-data! [:best :mean-test-error] + (double (/ best-total-test-error (count best-test-errors)))))) (when (zero? (:total-error best)) (doseq [[i error] (map vector (range) diff --git a/src/clojush/pushgp/pushgp.clj b/src/clojush/pushgp/pushgp.clj index 1e9506cc8..8e3e36b5c 100644 --- a/src/clojush/pushgp/pushgp.clj +++ b/src/clojush/pushgp/pushgp.clj @@ -1,7 +1,8 @@ (ns clojush.pushgp.pushgp (:require [clojure.java.io :as io] [clj-random.core :as random] - [clojure.repl :as repl]) + [clojure.repl :as repl] + [clojush.pushgp.record :as r]) (:use [clojush args globals util pushstate random individual evaluate simplification translate] [clojush.instructions boolean code common numbers random-instructions string char vectors tag zip return input-output genome] @@ -144,13 +145,18 @@ ([args] (reset! timer-atom (System/currentTimeMillis)) (load-push-argmap args) + (when (some? (:record-host @push-argmap)) + (r/host! (str (:record-host @push-argmap)))) (random/with-rng (random/make-mersennetwister-rng (:random-seed @push-argmap)) ;; set globals from parameters (reset-globals) (initial-report @push-argmap) ;; Print the inital report - (print-params @push-argmap) + (r/uuid! (:run-uuid @push-argmap)) + (print-params (r/config-data! [:argmap] (dissoc @push-argmap :run-uuid))) (check-genetic-operator-probabilities-add-to-one @push-argmap) (timer @push-argmap :initialization) + (when (:print-timings @push-argmap) + (r/config-data! [:initialization-ms] (:initialization @timer-atom))) (println "\n;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;") (println "\nGenerating initial population...") (flush) (let [pop-agents (make-pop-agents @push-argmap) @@ -161,6 +167,7 @@ ;(println) ;; Main loop (loop [generation 0] + (r/new-generation! generation) (println "Processing generation:" generation) (flush) (population-translate-plush-to-push pop-agents @push-argmap) (timer @push-argmap :reproduction) @@ -183,6 +190,8 @@ ;; report and check for success (let [[outcome best] (report-and-check-for-success (vec (doall (map deref pop-agents))) generation @push-argmap)] + (r/generation-data! [:outcome] outcome) + (r/end-generation!) (cond (= outcome :failure) (do (printf "\nFAILURE\n") (if (:return-simplified-on-failure @push-argmap) (auto-simplify best diff --git a/src/clojush/pushgp/record.clj b/src/clojush/pushgp/record.clj new file mode 100644 index 000000000..d168c4090 --- /dev/null +++ b/src/clojush/pushgp/record.clj @@ -0,0 +1,114 @@ +;;; Records the results of runs to an external server + +;; Use documented in https://push-language.hampshire.edu/t/recording-and-analyzing-experimental-results/830 + +;; If `record-host` is set in the arguments, then we should send +;; send data about each run, as it progresses, to that host for archival +;; and monitoring purposes. + +;; The functions in this file are stateful and should be called in this order: +;; +;; (new-run! uuid! config-data!* (new-generation! generation-data!* end-generation!)*)* +;; +;; Currently it doesn't enforce this and if you call a method when you shouldn't +;; the results are unkown. +;; Also it will not send anything over the network until `host!` is called, +;; before that, `end-generation!` will be a no-op. + +(ns clojush.pushgp.record + (:require [clojure.java.io] + [cheshire.core] + [cheshire.generate] + [clojure.string])) + +;; write functions as strings +(cheshire.generate/add-encoder + clojure.lang.AFunction + cheshire.generate/encode-str) + +(def hostname-and-port (atom nil)) +(def writer (atom nil)) + +(defn- ->writer + ; https://github.com/clojure-cookbook/clojure-cookbook/blob/master/05_network-io/5-09_tcp-client.asciidoc + [] + (let [[hostname port] @hostname-and-port] + (-> (java.net.Socket. hostname port) + clojure.java.io/writer))) + +(defn- set-writer! + ; Tries to get a writer to send data on, and if it fails, retries every + ; 5 seconds + [] + (println "Trying to connect to external server for recording at " @hostname-and-port "...") + (try + (reset! writer (->writer)) + (catch java.net.ConnectException _ + (Thread/sleep 5000) + (set-writer!)))) + +(defn host! [host-str] + (let [[hostname port-str] (clojure.string/split host-str #":")] + (reset! hostname-and-port [hostname (int (bigint port-str))]) + (set-writer!))) + +(defn- write-data! [data] + (when (some? @hostname-and-port) + (println "Trying to record data to external server...") + (try + (do + (cheshire.core/generate-stream data @writer) + (.newLine @writer) + (.flush @writer)) + (catch java.net.SocketException _ + (set-writer!) + (write-data! data))))) + +(def data (atom {})) + + +;; Stores a configuration option for the run, for the sequence of `ks` and value `v` +;; i.e. (config-data! [:git-uuid] "abc-def") +(defn config-data! [ks v] + (swap! data assoc-in (cons :config ks) v) + v) + +(defn seconds-since-epoch + ;; http://stackoverflow.com/a/17432411 + ;; because Spark interprets numbers as dates in this format when in JSON + [] + (quot (System/currentTimeMillis) 1000)) + +;; Resets the run data and saves the start time. Should be called at the +;; begining of a run +(defn new-run! [] + (reset! data {:config {:start-time (seconds-since-epoch)}})) + +(defn uuid! [uuid] + (swap! data assoc :uuid uuid)) + +;; Resets the generation data and should be called at the begining of +;; each generation +(defn new-generation! [index] + (swap! + data + assoc + :index index + :generation {:start-time (seconds-since-epoch)})) + + +;; Stores data about the generation, i.e. +;; (generation-data! [:best :error] [1 2 3 10]) +(defn generation-data! [ks v] + (swap! data assoc-in (cons :generation ks) v) + v) + +;; Sends the data for the current generation over the network to be recorded +;; Also sends the configuration with each generation +(defn end-generation! [] + (let [{:keys [generation uuid index config]} @data] + (write-data! + (assoc generation + :config-uuid uuid + :index index + :config config)))) diff --git a/src/clojush/pushgp/report.clj b/src/clojush/pushgp/report.clj index e489b093c..8b7bb8245 100644 --- a/src/clojush/pushgp/report.clj +++ b/src/clojush/pushgp/report.clj @@ -6,7 +6,8 @@ [clj-random.core :as random] [local-file] [clojure.data.csv :as csv] - [clojure.java.io :as io])) + [clojure.java.io :as io] + [clojush.pushgp.record :as r])) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; helper functions @@ -308,6 +309,7 @@ print-edn-logs edn-keys edn-log-filename edn-additional-keys] :as argmap}] + (r/generation-data! [:population] population) (println) (println ";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;") (println ";; -*- Report at generation" generation) @@ -352,6 +354,7 @@ (lexicase-report population argmap)) (when (= total-error-method :ifs) (implicit-fitness-sharing-report population argmap)) (println (format "--- Best Program (%s) Statistics ---" (str "based on " (name err-fn)))) + (r/generation-data! [:best :individual] best) (println "Best genome:" (print-genome best)) (println "Best program:" (pr-str (not-lazy (:program best)))) (when (> report-simplifications 0) @@ -365,8 +368,10 @@ (when (and print-errors (not (empty? meta-error-categories))) (println "Meta-Errors:" (not-lazy (:meta-errors best)))) (println "Total:" (:total-error best)) - (println "Mean:" (float (/ (:total-error best) - (count (:errors best))))) + (let [mean (r/generation-data! [:best :mean-error] (float (/ (:total-error best) + (count (:errors best)))))] + + (println "Mean:")) (when (not= normalization :none) (println "Normalized error:" (:normalized-error best))) (case total-error-method @@ -375,11 +380,12 @@ :ifs (println "IFS-error:" (:weighted-error best)) nil) (when print-history (println "History:" (not-lazy (:history best)))) - (println "Genome size:" (count (:genome best))) - (println "Size:" (count-points (:program best))) - (printf "Percent parens: %.3f\n" - (double (/ (count-parens (:program best)) - (count-points (:program best))))) ;Number of (open) parens / points + (println "Genome size:" (r/generation-data! [:best :genome-size] (count (:genome best)))) + (println "Size:" (r/generation-data! [:best :program-size] (count-points (:program best)))) + (printf "Percent parens: %.3f\n" + (r/generation-data! [:best :percent-parens] + (double (/ (count-parens (:program best)) + (count-points (:program best)))))) ;Number of (open) parens / points (println "--- Population Statistics ---") (when print-cosmos-data (println "Cosmos Data:" (let [quants (config/quantiles (count population))] @@ -514,7 +520,8 @@ "Prints the initial report of a PushGP run." [{:keys [problem-specific-initial-report] :as push-argmap}] (problem-specific-initial-report push-argmap) - (println "Registered instructions:" @registered-instructions) + (println "Registered instructions:" + (r/config-data! [:registered-instructions] @registered-instructions)) (println "Starting PushGP run.") (printf "Clojush version = ") (try @@ -524,7 +531,7 @@ version-number (.substring version-str 1 (count version-str))] (if (empty? version-number) (throw Exception) - (printf (str version-number "\n")))) + (printf (str (r/config-data! [:version-number] version-number)) "\n"))) (flush) (catch Exception e (printf "version number unavailable\n") @@ -538,6 +545,7 @@ ;; been committed already. ;; - GitHub link will only work if commit has been pushed ;; to GitHub. + (r/config-data! [:git-hash] git-hash) (printf (str "Hash of last Git commit = " git-hash "\n")) (printf (str "GitHub link = https://github.com/lspector/Clojush/commit/" git-hash