From 638bf0a1bdb8d98a856d64bc4ddea0d7b9bf82d3 Mon Sep 17 00:00:00 2001 From: Saul Shanabrook Date: Mon, 3 Jul 2017 18:41:56 -0400 Subject: [PATCH] Refactor reporting --- project.clj | 3 +- src/clojush/args.clj | 15 +- src/clojush/core.clj | 15 +- src/clojush/log.clj | 19 + src/clojush/log/events.clj | 9 + src/clojush/log/events/config.clj | 45 ++ src/clojush/log/events/generation.clj | 249 +++++++ .../log/events/generation/individual.clj | 91 +++ .../log/events/generation/lexicase.clj | 57 ++ src/clojush/log/events/init.clj | 24 + src/clojush/log/handlers.clj | 13 + src/clojush/log/handlers/csv.clj | 47 ++ src/clojush/log/handlers/edn.clj | 35 + src/clojush/log/handlers/json.clj | 37 ++ src/clojush/log/handlers/remote.clj | 70 ++ src/clojush/log/handlers/text.clj | 252 +++++++ src/clojush/pushgp/pushgp.clj | 112 ++-- src/clojush/pushgp/record.clj | 114 ---- src/clojush/pushgp/report.clj | 615 ------------------ src/clojush/structured_logger.clj | 57 ++ test/clojush/test/structured_logger_test.clj | 45 ++ 21 files changed, 1119 insertions(+), 805 deletions(-) create mode 100644 src/clojush/log.clj create mode 100644 src/clojush/log/events.clj create mode 100644 src/clojush/log/events/config.clj create mode 100644 src/clojush/log/events/generation.clj create mode 100644 src/clojush/log/events/generation/individual.clj create mode 100644 src/clojush/log/events/generation/lexicase.clj create mode 100644 src/clojush/log/events/init.clj create mode 100644 src/clojush/log/handlers.clj create mode 100644 src/clojush/log/handlers/csv.clj create mode 100644 src/clojush/log/handlers/edn.clj create mode 100644 src/clojush/log/handlers/json.clj create mode 100644 src/clojush/log/handlers/remote.clj create mode 100644 src/clojush/log/handlers/text.clj delete mode 100644 src/clojush/pushgp/record.clj delete mode 100644 src/clojush/pushgp/report.clj create mode 100644 src/clojush/structured_logger.clj create mode 100644 test/clojush/test/structured_logger_test.clj diff --git a/project.clj b/project.clj index 6ef8c4a6a..cc65feba1 100644 --- a/project.clj +++ b/project.clj @@ -14,7 +14,8 @@ [clj-random "0.1.7"] ;; https://mvnrepository.com/artifact/org.apache.commons/commons-math3 [org.apache.commons/commons-math3 "3.2"] - [cheshire "5.7.1"]] + [cheshire "5.7.1"] + [prismatic/plumbing "0.5.4"]] :plugins [[lein-codox "0.9.1"] [lein-shell "0.5.0"] [lein-gorilla "0.4.0"] diff --git a/src/clojush/args.clj b/src/clojush/args.clj index 1074ddf42..be4948681 100644 --- a/src/clojush/args.clj +++ b/src/clojush/args.clj @@ -1,8 +1,7 @@ (ns clojush.args (:require [clj-random.core :as random]) (:use [clojush globals random util pushstate] - [clojush.instructions.tag] - [clojush.pushgp report])) + [clojush.instructions.tag])) (def push-argmap (atom (sorted-map @@ -89,8 +88,8 @@ :uniform-addition 0.0 :uniform-addition-and-deletion 0.0 :uniform-combination-and-deletion 0.0 - :genesis 0.0 - } + :genesis 0.0} + ;; The map supplied to :genetic-operator-probabilities should contain genetic operators ;; that sum to 1.0. All available genetic operators are defined in clojush.pushgp.breed. ;; Along with single operators, pipelines (vectors) containing multiple operators are @@ -376,11 +375,11 @@ ;; The number of simplification steps that will happen during final report ;; simplifications. - :problem-specific-initial-report default-problem-specific-initial-report + :problem-specific-initial-report (fn [argmap] :no-problem-specific-initial-report-function-defined) ;; A function can be called to provide a problem-specific initial report, which happens ;; before the normal initial report is printed. - :problem-specific-report default-problem-specific-report + :problem-specific-report (fn [& args] :no-problem-specific-report-function-defined) ;; A function can be called to provide a problem-specific report, which happens before ;; the normal generational report is printed. @@ -462,10 +461,10 @@ ;; Should be in the format ":" ;; If set, will send logs of each run to a server running on this ;; host - :label nil + :label nil))) ;; If set, will send this in the configuration of the run, to the ;; external record - ))) + (defn load-push-argmap [argmap] diff --git a/src/clojush/core.clj b/src/clojush/core.clj index e2bbe3302..ffc57cb9e 100644 --- a/src/clojush/core.clj +++ b/src/clojush/core.clj @@ -16,14 +16,14 @@ ;; for more details. (ns clojush.core - (:require [clojush.pushgp.record :as r]) - (:use [clojush.pushgp pushgp report]) + (:require [clojush.log :refer [log!]]) + (:use [clojush.pushgp pushgp]) (:gen-class)) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; main function -(defn -main +(defn -main "A main function for Clojush, which assumes that the first argument is the name of a problem file that contains an argmap of arguments to PushGP. Exits after completion of the call. @@ -31,18 +31,13 @@ This allows one to run an example with a call from the OS shell prompt like: lein run examples.simple-regression :population-size 3000" [& args] - (r/new-run!) - (println "Command line args:" (apply str (interpose \space args))) (let [param-list (map #(if (.endsWith % ".ser") (str %) (read-string %)) (rest args))] - (require (symbol (r/config-data! [:problem-file] (first args)))) + (require (symbol (first args))) (let [example-params (eval (symbol (str (first args) "/argmap"))) params (merge example-params (apply sorted-map param-list))] - (println "######################################") - (println "Parameters set at command line or in problem file argmap; may or may not be default:") - (print-params (into (sorted-map) params)) - (println "######################################") + (log! :init {:args args :params params}) (pushgp params) (System/exit 0)))) diff --git a/src/clojush/log.clj b/src/clojush/log.clj new file mode 100644 index 000000000..490612091 --- /dev/null +++ b/src/clojush/log.clj @@ -0,0 +1,19 @@ +(ns clojush.log + (:require [clojush.structured-logger :refer [->structured-logger]] + [clojush.log.events :refer [label->compute-graph]] + [clojush.log.handlers :refer [handlers]])) +;; this stuff is spread out over many files in subdirectories +;; Not only does this help with keeping files shorter, but it also allows +;; us to use the defnk names directly in the graph, because we don't +;; have to namespace them. For example, if we define `best` in the +;; lexicase file, then we can just throw that in the graph we defined +;; there, and it will registered under :best. + +(def structured-logger + (->structured-logger + {:handlers handlers + :label->compute-graph label->compute-graph})) + +(def log! (:log! structured-logger)) + +(def label->computed (:label->computed structured-logger)) diff --git a/src/clojush/log/events.clj b/src/clojush/log/events.clj new file mode 100644 index 000000000..282790622 --- /dev/null +++ b/src/clojush/log/events.clj @@ -0,0 +1,9 @@ +(ns clojush.log.events + (:require [clojush.log.events.init] + [clojush.log.events.config] + [clojush.log.events.generation])) + +(def label->compute-graph + {:init clojush.log.events.init/compute-graph + :config clojush.log.events.config/compute-graph + :generation clojush.log.events.generation/compute-graph}) diff --git a/src/clojush/log/events/config.clj b/src/clojush/log/events/config.clj new file mode 100644 index 000000000..4b6bb823a --- /dev/null +++ b/src/clojush/log/events/config.clj @@ -0,0 +1,45 @@ +(ns clojush.log.events.config + (:require [plumbing.core :refer [defnk]] + [plumbing.graph :as graph] + [clj-random.core :as random] + [local-file] + [clojure.string :as string])) + +(defnk clojush-version [] + (let [version-str (apply str (butlast (re-find #"\".*\"" + (first (string/split-lines + (local-file/slurp* "project.clj"))))))] + (.substring version-str 1 (count version-str)))) + +(defnk argmap [argmap-input] + argmap-input) + +(defnk registered-instructions [registered-instructions-input] + registered-instructions-input) + +(defnk argmap-with-random-str [argmap-input] + (update argmap :random-seed random/seed-to-string)) + +(defnk git-hash [] + (let [dir (local-file/project-dir)] + (string/trim + (slurp + (str dir + "/.git/" + (subs + (string/trim + (slurp + (str dir "/.git/HEAD"))) + 5)))))) + +(defnk initialization-ms [timing-map] + (:initialization timing-map)) + +(def compute-graph + (graph/graph + clojush-version + argmap + registered-instructions + git-hash + initialization-ms + argmap-with-random-str)) diff --git a/src/clojush/log/events/generation.clj b/src/clojush/log/events/generation.clj new file mode 100644 index 000000000..2d97c99ee --- /dev/null +++ b/src/clojush/log/events/generation.clj @@ -0,0 +1,249 @@ +(ns clojush.log.events.generation + (:require [plumbing.graph :as graph] + [plumbing.core :refer [defnk fnk map-vals]] + [cosmos.config :as config] + [clj-random.core :as random] + + [clojush.log.events.generation.individual :refer [->individual]] + [clojush.log.events.generation.lexicase :as lexicase] + [clojush.util :as util])) + +(defnk input-population [pop-agents] + (doall (map deref pop-agents))) + +(defnk population-errors [input-population] + (doall (map :errors input-population))) + +(defnk min-error-by-case [population-errors] + (doall (apply map + (fn [& args] (apply min args)) + population-errors))) + +;; we want to dynamically calculate additional properties on each individual, so we only calculate things like +;; % perenthesis or mean error once for each individual, regardless of where we are using that information +(defnk population [input-population min-error-by-case [:config argmap]] + ;; include the argmap and min-error-by-case in each individual's graph + ;; so that we can calculate the n-elite-cases + (->> input-population + (map (fn [ind] + (merge + (->individual {:individual ind + :argmap argmap + :min-error-by-case min-error-by-case}) + ind))) + doall)) + +(defnk error-frequencies-by-case [population] + (doall (map frequencies (apply map vector (map :errors population))))) + +(defnk ifs-best [population] + (apply min-key :weighted-error population)) + +(defnk err-fn [[:config [:argmap total-error-method]]] + (if (= total-error-method :rmse) :weighted-error :total-error)) + +(defnk sorted [err-fn population [:config [:argmap total-error-method]]] + (let [err-fn (if (= total-error-method :rmse) :weighted-error :total-error)] + (sort-by err-fn < population))) + +(defnk err-fn-best [sorted] + (first sorted)) + +(defnk psr-best + [[:config [:argmap error-function report-simplifications problem-specific-report]] + err-fn-best + population + [:generation index]] + (problem-specific-report err-fn-best + population + index + error-function + report-simplifications)) + +(defnk best [psr-best err-fn-best] + (if (:program psr-best) + psr-best + err-fn-best)) + +(defnk cosmos-data [population] + (let [quants (config/quantiles (count population))] + (zipmap quants + (map #(:total-error (nth (sort-by :total-error population) %)) + quants)))) + +(defnk error-by-case-mean [population-errors] + (apply map (fn [& args] (*' 1.0 (util/mean args))) + population-errors)) + +(defnk error-by-case-min [population-errors] + (apply map (fn [& args] (apply min args)) + population-errors)) + +(defnk population-meta-errors [input-population] + (doall (map :meta-errors input-population))) + +(defnk meta-error-by-category-mean [population-meta-errors] + (apply map (fn [& args] (*' 1.0 (util/mean args))) + population-meta-errors)) + +(defnk meta-error-by-category-min [population-meta-errors] + (apply map (fn [& args] (apply min args)) + population-meta-errors)) + + +(def stats-graph + (graph/graph + :n (fnk [xs] (count xs)) + :min (fnk [xs] (* 1.0 (apply min xs))) + :max (fnk [xs] (* 1.0 (apply max xs))) + :mean (fnk [xs] (* 1.0 (util/mean xs))) + :median (fnk [quartiles] (util/median quartiles)) + :sorted (fnk [xs] (sort xs)) + :first-quartile (fnk [sorted n] ((util/truncate (/ n 4)) sorted)) + :third-quartile (fnk [quartiles n] ((util/truncate (/ (* 3 n) 4)) sorted)) + :standard-deviation + (fnk [mean n xs] + (Math/sqrt + (/ (apply +' (map #(* (- % mean) (- % mean)) + xs)) + (dec n)))))) + +(defn population-stats [attribute] + (graph/graph + :xs (fnk [population] + (doall (map attribute population))) + stats-graph)) + +(defn diversity [attribute] + (graph/graph + :frequencies-map (fnk [population] + (frequencies (map attribute population))) + :frequency-stats + (graph/graph + :xs (fnk [frequencies-map] (doall (vals frequencies-map))) + stats-graph) + :percent-unique + (fnk [population item->counts] + (float (/ (count item->counts) (count population)))))) + +(defn sample-population-edit-distance + "Returns a sample of Levenshtein distances between programs in the population, + where each is divided by the length of the longer program." + [pop samples] + (let [instr-programs (map #(map :instruction %) + (map :genome pop))] + (repeatedly samples + #(let [prog1 (random/lrand-nth instr-programs) + prog2 (random/lrand-nth instr-programs) + longer-length (max (count prog1) (count prog2))] + (if (zero? longer-length) + 0 + (float (/ (util/levenshtein-distance prog1 prog2) + longer-length))))))) + +(def homology-stats-graph + (graph/graph + :xs (fnk [population] + (doall (sample-population-edit-distance population 1000))) + stats-graph)) + +(defnk selection-counts-sorted [selection-counts [:config [:argmap population-size]]] + (sort > (concat (vals @selection-counts) + (repeat (- population-size (count @selection-counts)) 0)))) + +(defnk reset-selection-counts! [selection-counts] + (reset! selection-counts {})) + +(defnk non-diversifying-n [population] + (count (filter :is-random-replacement population))) + +(defnk evaluations-count [evaluations-count-input] + evaluations-count-input) + +(defnk point-evaluations-before-report [point-evaluations-before-report-input] + point-evaluations-before-report-input) + +(defnk reset-point-evaluations-count! [point-evaluations-count point-evaluations-before-report] + (reset! point-evaluations-count point-evaluations-before-report)) + +(defnk timing-map [timing-map-atom] + @timing-map-atom) + +(defnk timing-map-total [timing-map] + (apply + (vals timing-map))) + +(defnk timing-map-total-seconds [timing-map-total] + (/ timing-map-total 1000)) + +(defnk timing-map-seconds [timing-map] + (map-vals timing-map #(/ % 1000))) + +(defnk timing-map-percent [timing-map timing-map-total] + (map-vals timing-map #(* 100.0 (/ % timing-map-total)))) + +(defnk outcome + [[:config [:argmap exit-on-success error-threshold max-generations max-point-evaluations]] + best + index + point-evaluations-count] + (cond (and exit-on-success + (or (<= (:total-error best) error-threshold) + (:success best))) :success + (>= index max-generations) :failure + (>= @point-evaluations-count max-point-evaluations) :failure + :else :continue)) + +(defnk problem-specific-report-final-simplified-best + [[:config [:argmap problem-specific-report + error-function + report-simplifications]] + best + index] + (problem-specific-report (:final-simplification best) [] index error-function report-simplifications)) + + +(def compute-graph + (plumbing.graph/graph + input-population + population-errors + min-error-by-case + population + error-frequencies-by-case + :lexicase lexicase/compute-graph + ifs-best + err-fn + sorted + err-fn-best + psr-best + best + cosmos-data + error-by-case-mean + error-by-case-min + population-meta-errors + meta-error-by-category-mean + meta-error-by-category-min + :total-error-stats (population-stats :total-error) + :genome-size-stats (population-stats :genome-size) + :program-size-stats (population-stats :program-size) + :program-percent-parens-stats (population-stats :program-percent-parens) + :age-stats (population-stats :age) + :grain-size-stats (population-stats :grain-size-stats) + :homology-stats homology-stats-graph + :genome-diversity (diversity :genome) + :program-diversity (diversity :program) + :errors-diversity (diversity :errors) + :total-error-diversity (diversity :total-error) + :behaviors-diversity (diversity :behaviors) + selection-counts-sorted + reset-selection-counts! + non-diversifying-n + evaluations-count + point-evaluations-before-report + reset-point-evaluations-count! + timing-map + timing-map-total + timing-map-total-seconds + timing-map-seconds + timing-map-percent + outcome + problem-specific-report-final-simplified-best)) diff --git a/src/clojush/log/events/generation/individual.clj b/src/clojush/log/events/generation/individual.clj new file mode 100644 index 000000000..916f7a281 --- /dev/null +++ b/src/clojush/log/events/generation/individual.clj @@ -0,0 +1,91 @@ +;; used in the population attribute of the generation +(ns clojush.log.events.generation.individual + (:require [plumbing.core :refer [defnk]] + [plumbing.graph :as graph] + [clojush.util :as util] + [clojush.simplification :refer [auto-simplify]])) + +(defnk program-size [[:individual program]] + (util/count-points program)) + +(defnk program-str [[:individual program]] + (if (and (seq? program) + (empty? program)) + "()" + (str program))) + +(defnk program-pr-str [[:individual program]] + (pr-str program)) + +(defnk program-n-parens [[:individual program]] + (util/count-parens program)) + +(defnk program-n-points [[:individual program]] + (util/count-points program)) + +(defnk program-percent-parens [[:individual program-n-points program-n-parens]] + (double (/ program-n-parens + program-n-points))) + +(defnk genome-size [[:individual genome]] + (count genome)) + +(defnk genome-str [[:individual genome]] + (if (empty? genome) + "()" + (str genome))) + +(defnk genome-without-uuid-pr-str [[:individual genome]] + (pr-str (util/not-lazy (map #(dissoc % :uuid :parent-uuid) genome)))) + +(defnk parent-uuids-str [[:individual parent-uuids]] + (map str parent-uuids)) + +(defnk error-mean [[:individual total-error errors]] + (float (/ total-error + (count errors)))) + +(defnk partial-simplification-program-pr-str + [individual + [:argmap report-simplifications error-function]] + (pr-str (util/not-lazy (:program (auto-simplify individual + error-function + report-simplifications + false + 1000))))) + +(defnk final-simplification + [individual + [:argmap error-function final-report-simplifications]] + (auto-simplify individual + error-function + final-report-simplifications + true + 500)) + +(defnk n-elite-cases + [[:individual errors] + min-error-by-case] + (apply + (map #(if (== %1 %2) 1 0) + errors + min-error-by-case))) + +(def compute-graph + (graph/graph + program-size + program-str + program-pr-str + program-n-parens + program-n-points + program-percent-parens + genome-size + genome-str + genome-without-uuid-pr-str + parent-uuids-str + error-mean + partial-simplification-program-pr-str + final-simplification + n-elite-cases)) + +(def ->individual + (graph/lazy-compile compute-graph)) diff --git a/src/clojush/log/events/generation/lexicase.clj b/src/clojush/log/events/generation/lexicase.clj new file mode 100644 index 000000000..1fd10b87f --- /dev/null +++ b/src/clojush/log/events/generation/lexicase.clj @@ -0,0 +1,57 @@ +(ns clojush.log.events.generation.lexicase + (:require [plumbing.core :refer [defnk]] + [plumbing.graph :as graph])) + +(defnk best-individual [min-error-by-case population] + (apply max-key + (fn [ind] + (apply + (map #(if (== %1 %2) 1 0) + (:errors ind) + min-error-by-case))) + population)) + +(defnk most-zero-cases-best-individual [population] + (apply max-key + (fn [ind] + (apply + (map #(if (zero? %) 1 0) + (:errors ind)))) + population)) + +(defnk pop-elite-by-case [min-error-by-case population-errors] + (doall + (map (fn [errors] + (map #(if (== %1 %2) 1 0) + errors + min-error-by-case)) + population-errors))) + +(defnk count-elites-by-case [pop-elite-by-case] + (doall + (map #(apply + %) (apply mapv vector pop-elite-by-case)))) + +(defnk pop-zero-by-case [population-errors] + (doall + (map (fn [errors] + (map #(if (zero? %) 1 0) + errors)) + population-errors))) + +(defnk count-zero-by-case [pop-zero-by-case] + (doall (map #(apply + %) (apply mapv vector pop-zero-by-case)))) + +(defnk mean-n-elite-cases [count-elites-by-case population] + (float (/ (apply + count-elites-by-case) (count population)))) + +(defnk mean-n-zero-cases [count-zero-by-case population] + (float (/ (apply + count-zero-by-case) (count population)))) + +(def compute-graph + (graph/graph + best-individual + most-zero-cases-best-individual + pop-elite-by-case + count-elites-by-case + pop-zero-by-case + count-zero-by-case + mean-n-elite-cases + mean-n-zero-cases)) diff --git a/src/clojush/log/events/init.clj b/src/clojush/log/events/init.clj new file mode 100644 index 000000000..e83a30ca4 --- /dev/null +++ b/src/clojush/log/events/init.clj @@ -0,0 +1,24 @@ +(ns clojush.log.events.init + (:require [plumbing.core :refer [defnk]] + [plumbing.graph :as graph] + [clj-random.core :as random])) + +(defnk problem-file [args] + (first args)) + +(defnk args-str [args] + (apply str (interpose \space args))) + +(defnk params-as-map [params] + (into (sorted-map) params)) + + +(defnk argmap-with-random-str [params-as-map] + (update params-as-map :random-seed random/seed-to-string)) + +(def compute-graph + (graph/graph + problem-file + args-str + params-as-map + argmap-with-random-str)) diff --git a/src/clojush/log/handlers.clj b/src/clojush/log/handlers.clj new file mode 100644 index 000000000..f5b1839ec --- /dev/null +++ b/src/clojush/log/handlers.clj @@ -0,0 +1,13 @@ +(ns clojush.log.handlers + (:require [clojush.log.handlers.csv] + [clojush.log.handlers.json] + [clojush.log.handlers.edn] + [clojush.log.handlers.text] + [clojush.log.handlers.remote])) + +(def handlers + [clojush.log.handlers.csv/handler + clojush.log.handlers.json/handler + clojush.log.handlers.edn/handler + clojush.log.handlers.text/handler + clojush.log.handlers.remote/handler]) diff --git a/src/clojush/log/handlers/csv.clj b/src/clojush/log/handlers/csv.clj new file mode 100644 index 000000000..69c7c6219 --- /dev/null +++ b/src/clojush/log/handlers/csv.clj @@ -0,0 +1,47 @@ +(ns clojush.log.handlers.csv + (:require [plumbing.core :refer [defnk]] + [clojure.java.io :as io] + [clojure-csv.core :as csv])) + + +(defnk handle-generation + [[:config [:argmap csv-log-filename csv-columns print-csv-logs]] + generation] + (when print-csv-logs + (let [population (:population generation) + columns (concat [:uuid] + (filter #(some #{%} csv-columns) + [:generation :location :parent-uuids :genetic-operators + :push-program-size :plush-genome-size :push-program + :plush-genome :total-error :is-random-replacement]))] + (when (zero? (:index generation)) + (with-open [csv-file (io/writer csv-log-filename :append false)] + (csv/write-csv csv-file + (vector (concat (map name columns) + (when (some #{:test-case-errors} csv-columns) + (map #(str "TC" %) + (range (count (:errors (first population))))))))))) + (with-open [csv-file (io/writer csv-log-filename :append true)] + (csv/write-csv + csv-file + (map-indexed + (fn [location individual] + (concat (map (assoc (into {} individual) + :generation (:index generation) + :location location + :parent-uuids (:parent-uuids-str individual) + :genetic-operators (if (nil? (:genetic-operators individual)) + [] + (:genetic-operators individual)) + :push-program-size (:program-n-points individual) + :push-program (:program-str individual) + :plush-genome-size (:genome-size individual) + :plush-genome (:genome-str individual)) + ; This is a map of an individual + columns) + (when (some #{:test-case-errors} csv-columns) + (:errors individual)))) + population)))))) + +(def handler + {:generation handle-generation}) diff --git a/src/clojush/log/handlers/edn.clj b/src/clojush/log/handlers/edn.clj new file mode 100644 index 000000000..6aa8fd5c0 --- /dev/null +++ b/src/clojush/log/handlers/edn.clj @@ -0,0 +1,35 @@ +(ns clojush.log.handlers.edn + (:require [plumbing.core :refer [defnk]] + [clojure.java.io :as io])) + +(defnk handle-config [[:config argmap]] + (when (:print-edn-logs argmap) + (with-open [w (io/writer (:edn-log-filename argmap) :append false)] + (.write w "#clojush/run") + (.write w (prn-str (dissoc argmap + ;; These keys have functions + :atom-generators + :error-function + :problem-specific-report + :random-seed)))))) + +(defnk handle-generation + [[:config argmap] + generation] + (when (:print-edn-logs argmap) + (with-open [w (io/writer (:edn-log-filename argmap) :append true)] ;; Opens and closes the file once per call + (doall + (map-indexed (fn [index individual] + (let [additional-data {:generation (:index generation) + :location index + :push-program-size (:program-size individual) + :plush-genome-size (:genome-size individual)}] + (.write w "#clojush/individual") + (.write w (prn-str (merge + (select-keys additional-data (:edn-additional-keys argmap)) + (select-keys individual (:edn-keys argmap))))))) + (:population generation)))))) + +(def handler + {:config handle-config + :generation handle-generation}) diff --git a/src/clojush/log/handlers/json.clj b/src/clojush/log/handlers/json.clj new file mode 100644 index 000000000..e27fe61ae --- /dev/null +++ b/src/clojush/log/handlers/json.clj @@ -0,0 +1,37 @@ +(ns clojush.log.handlers.json + (:require [plumbing.core :refer [defnk]] + [clojure.data.json :refer [json-str]])) + +(defn jsonize-individual + "Takes an individual and returns it with only the items of interest + for the json logs." + [log-fitnesses-for-all-cases json-log-program-strings index individual] + (cond-> {:total-error (:total-error individual) + :generation index + :size (:program-size individual)} + log-fitnesses-for-all-cases (assoc :errors (:errors individual)) + json-log-program-strings (assoc :program (:program-str individual)) + (:weighted-error individual) (assoc :weighted-error (:weighted-error individual)))) + + + + +(defnk handle-generation + [[:config [:argmap json-log-filename + print-json-logs + log-fitnesses-for-all-cases + json-log-program-strings]] + [:generation index :as generation]] + (when print-json-logs + (let [pop-json-string (json-str (map #(jsonize-individual + log-fitnesses-for-all-cases + json-log-program-strings + index + %) + (:population generation)))] + (if (zero? index) + (spit json-log-filename (str pop-json-string "\n") :append false) + (spit json-log-filename (str "," pop-json-string "\n") :append true))))) + +(def handler + {:generation handle-generation}) diff --git a/src/clojush/log/handlers/remote.clj b/src/clojush/log/handlers/remote.clj new file mode 100644 index 000000000..1a27a7315 --- /dev/null +++ b/src/clojush/log/handlers/remote.clj @@ -0,0 +1,70 @@ +(ns clojush.log.handlers.remote + (:require [plumbing.core :refer [defnk]] + [clojure.java.io] + [cheshire.core] + [cheshire.generate] + [clojure.string]) + (:import java.net.Socket)) + +;; write functions as strings +(cheshire.generate/add-encoder + clojure.lang.AFunction + cheshire.generate/encode-str) + +(def hostname-and-port (atom nil)) +(def writer (atom nil)) + +(defn ->writer + ; https://github.com/clojure-cookbook/clojure-cookbook/blob/master/05_network-io/5-09_tcp-client.asciidoc + [] + (let [[hostname port] @hostname-and-port] + (-> (java.net.Socket. hostname port) + clojure.java.io/writer))) + +(defn set-writer! + ; Tries to get a writer to send data on, and if it fails, retries every + ; 5 seconds + [] + (println "Trying to connect to external server for recording at " @hostname-and-port "...") + (try + (reset! writer (->writer)) + (catch java.net.ConnectException _ + (Thread/sleep 5000) + (set-writer!)))) + +(defn host! [host-str] + (let [[hostname port-str] (clojure.string/split host-str #":")] + (reset! hostname-and-port [hostname (int (bigint port-str))]) + (set-writer!))) + +(defn write-data! [data] + (when (some? @hostname-and-port) + (println "Trying to record data to external server...") + (try + (do + (cheshire.core/generate-stream data @writer) + (.newLine @writer) + (.flush @writer)) + (catch java.net.SocketException _ + (set-writer!) + (write-data! data))))) + + +(defnk handle-config + [init config] + (let [host (:record-host (:params config))] + (when host + (host! host)))) + +(defnk handle-generation [config generation] + (write-data! + (assoc generation + ; don't include params or args in report, because most of the info is + ; already in push-argmap and program-file + :config (dissoc config :params :args :run-uuid) + :config-uuid (:run-uuid config) + :individuals (map #(dissoc % :program) (:individuals generation))))) + +(def handler + {:config handle-config + :generation handle-generation}) diff --git a/src/clojush/log/handlers/text.clj b/src/clojush/log/handlers/text.clj new file mode 100644 index 000000000..9247c23a8 --- /dev/null +++ b/src/clojush/log/handlers/text.clj @@ -0,0 +1,252 @@ +(ns clojush.log.handlers.text + (:require [plumbing.core :refer [defnk]] + [clj-random.core :as random] + [clojush.util :refer [not-lazy]])) + +(defn print-params [push-argmap] + (doseq [[param val] push-argmap] + (println (name param) "=" val))) + +(defnk handle-init [init] + (println "Command line args:" (:args-str init)) + (println "######################################") + (println "Parameters set at command line or in problem file argmap; may or may not be default:") + (print-params (:argmap-with-random-str init)) + (println "######################################")) + +(defnk handle-config [config] + ((:problem-specific-initial-report config)) + (println "Registered instructions:" (:registered-instructions config)) + (println "Starting PushGP run.") + (if (empty? (:clojush-version config)) + (println "version number unavailable") + (println "Clojush version = " (:clojush-version config))) + ;; NOTES: - Last commit hash will only be correct if this code has + ;; been committed already. + ;; - GitHub link will only work if commit has been pushed + ;; to GitHub. + (let [git-hash-str (if (empty? (:git-hash config)) "unavailable" (:git-hash config))] + (println "Hash of last Git commit =" git-hash-str) + (println + (str + "GitHub link = https://github.com/lspector/Clojush/commit/" + git-hash-str))) + (print-params (:argmap-with-random-str config))) + + +(defnk lexicase-report + "This extra report is printed whenever lexicase selection is used." + [[:generation lexicase] + [:config [:argmap report-simplifications + print-errors + meta-error-categories + print-history]]] + + (doseq [[individual short-name header-name] [[(:best-individual lexicase) + "Lexicase" + "Elite"] + [(:most-zero-cases-best-individual lexicase) + "Zero cases" + "Zero"]]] + (println "--- Lexicase Program with Most" header-name "Cases Statistics ---") + (println short-name "best genome:" (:genome-without-uuid-pr-str individual)) + (println short-name "best program:" (:program-pr-str individual)) + (when (> report-simplifications 0) + (println short-name "best partial simplification:" + (:partial-simplification-program-pr-str individual))) + (when print-errors (println short-name "best errors:" (not-lazy (:errors individual)))) + (when (and print-errors (not (empty? meta-error-categories))) + (println short-name "best meta-errors:" (not-lazy (:meta-errors individual)))) + (println short-name "best number of elite cases:" (:n-elite-cases individual)) + (println short-name "best total error:" (:total-error individual)) + (println short-name "best mean error:" (:error-mean individual)) + (when print-history (println short-name "best history:" (not-lazy (:history individual)))) + (println short-name "best size:" (:program-size individual)) + (printf "Percent parens: %.3f\n" + (:program-n-points individual))) ;Number of (open) parens / points + + (println "--- Lexicase Population Statistics ---") + (println "Count of elite individuals by case:" (:count-elites-by-case lexicase)) + (println (format "Population mean number of elite cases: %.2f" + (:mean-n-elite-cases lexicase))) + (println "Count of perfect (error zero) individuals by case:" + (:count-zero-by-case lexicase)) + (println (format "Population mean number of perfect (error zero) cases: %.2f" + (:mean-n-zero-cases lexicase)))) + +(defnk implicit-fitness-sharing-report + [[:generation ifs-best] + [:config [:argmap print-errors meta-error-categories]]] + + (println "--- Program with Best Implicit Fitness Sharing Error Statistics ---") + (println "IFS best genome:" (:genome-without-uuid-pr-str ifs-best)) + (println "IFS best program:" (:program-pr-str ifs-best)) + (when print-errors (println "IFS best errors:" (not-lazy (:errors ifs-best)))) + (when (and print-errors (not (empty? meta-error-categories))) + (println "IFS best meta-errors:" (not-lazy (:meta-errors ifs-best)))) + (println "IFS best total error:" (:total-error ifs-best)) + (println "IFS best mean error:" (:error-mean ifs-best)) + (println "IFS best IFS error:" (:weighted-error ifs-best)) + (println "IFS best size:" (:program-n-points ifs-best)) + (printf "IFS best percent parens: %.3f\n" + (:program-percent-parens ifs-best))) + +(defnk handle-generation + [[:config argmap :as config] + [:generation best err-fn :as generation]] + (when (:print-error-frequencies-by-case argmap) + (println "Error frequencies by case:" + (:error-frequencies-by-case generation))) + (when (some #{(:parent-selection argmap)} + #{:lexicase :elitegroup-lexicase :leaky-lexicase :epsilon-lexicase + :random-threshold-lexicase}) + (lexicase-report + {:config config + :generation generation})) + (when (= (:total-error-method argmap) :ifs) + (implicit-fitness-sharing-report {:generation generation :config config})) + (println (format "--- Best Program (%s) Statistics ---" (str "based on " (name err-fn)))) + (println "Best genome:" (:genome-without-uuid-pr-str best)) + (println "Best program:" (:program-pr-str best)) + (when (> (:report-simplifications argmap) 0) + (println "Partial simplification:" + (:partial-simplification-program-pr-str best))) + (when (:print-errors argmap) (println "Errors:" (not-lazy (:errors best)))) + (when (and (:print-errors argmap) (not (empty? (:meta-error-categories argmap)))) + (println "Meta-Errors:" (not-lazy (:meta-errors best)))) + (println "Total:" (:total-error best)) + (println "Mean:" (:error-mean best)) + (when (not= (:normalization argmap) :none) + (println "Normalized error:" (:normalized-error best))) + (case (:total-error-method argmap) + :hah (println "HAH-error:" (:weighted-error best)) + :rmse (println "RMS-error:" (:weighted-error best)) + :ifs (println "IFS-error:" (:weighted-error best)) + nil) + (when (:print-history argmap) (println "History:" (not-lazy (:history best)))) + (when (= (:parent-selection argmap) :novelty-search) + (println "Novelty: " (float (:novelty best)))) + (println "Genome size:" (:genome-size best)) + (println "Size:" (:program-size best)) + (printf "Percent parens: %.3f\n" + (:program-percent-parens best)) ;Number of (open) parens / points + (println "--- Population Statistics ---") + (when (:print-cosmos-data argmap) + (println "Cosmos Data:" (:cosmos-data generation))) + (println "Average total errors in population:" + (-> generation :total-error-stats :mean)) + (println "Median total errors in population:" + (-> generation :total-error-stats :median)) + (when (:print-errors argmap) (println "Error averages by case:") + (:error-by-case-mean generation)) + (when (:print-errors argmap) (println "Error minima by case:") + (:error-by-case-min generation)) + (when (and (:print-errors argmap) (not (empty? (:meta-error-categories argmap)))) + (println "Meta-Error averages by category:" + (:meta-error-by-category-mean generation)) + (println "Meta-Error minima by category:" + (:meta-error-by-category-min generation))) + (println "Average genome size in population (length):" + (-> generation :genome-size-stats :mean)) + (println "Average program size in population (points):" + (get-in generation [:program-size-stats :mean])) + (printf "Average percent parens in population: %.3f\n" + (-> generation :program-percent-parens :mean)) + (let [stats (:age-stats generation)] + (println "Minimum age in population:" (:min stats)) + (println "Maximum age in population:" (:max stats)) + (println "Average age in population:" (:mean stats)) + (println "Median age in population:" (:median stats))) + (let [stats (:grain-size-stats generation)] + (println "Minimum grain-size in population:" (:min stats)) + (println "Maximum grain-size in population:" (:max stats)) + (println "Average grain-size in population:" (:mean stats)) + (println "Median grain-size in population:" (:median stats))) + (println "--- Population Diversity Statistics ---") + (let [diversity (:genome-diversity generation)] + (println "Min copy number of one Plush genome:" + (-> diversity :frequency-stats :min)) + (println "Median copy number of one Plush genome:" + (-> diversity :frequency-stats :median)) + (println "Max copy number of one Plush genome:" + (-> diversity :frequency-stats :max)) + (println "Genome diversity (% unique Plush genomes):\t" + (:percent-unique diversity))) + (let [diversity (:program-diversity generation)] + (println "Min copy number of one Push program:" + (-> diversity :frequency-stats :min)) + (println "Median copy number of one Push program:" + (-> diversity :frequency-stats :median)) + (println "Max copy number of one Push program:" + (-> diversity :frequency-stats :max)) + (println "Syntactic diversity (% unique Push programs):\t" + (:percent-unique diversity))) + (println "Total error diversity:\t\t\t\t" + (get-in generation [:total-error-diversity :percent-unique])) + (println "Error (vector) diversity:\t\t\t" + (get-in generation [:errors-diversity :percent-unique])) + (when (not (nil? (:behaviors (first (:population generation))))) + (println "Behavioral diversity:\t\t\t\t" + (get-in generation [:behaviors-diversity :percent-unique]))) + (when (:print-homology-data argmap) + (let [stats (:homology-stats generation)] + (println "--- Population Homology Statistics (all stats reference the sampled population edit distance of programs) ---") + (println "Number of homology samples:" (:n stats)) + (println "Average: " (:mean stats)) + (println "Standard deviation: " (:standard-deviation stats)) + (println "First quartile: " (:first-quart stats)) + (println "Median: " (:median stats)) + (println "Third quartile: " (:third-quart stats)))) + + (when (:print-selection-counts argmap) + (println "Selection counts:" + (:selection-counts-sorted generation)) + ((:reset-selection-counts! generation))) + (when (:autoconstructive argmap) + (println "Number of random replacements for non-diversifying individuals:" + (:non-diversifying-n generation))) + (println "--- Run Statistics ---") + (println "Number of program evaluations used so far:" + @(:evaluations-count generation)) + (println "Number of point (instruction) evaluations so far:" + (:point-evaluations-before-report generation)) + ((:reset-point-evaluations-count! generation)) + (println "--- Timings ---") + (println "Current time:" (System/currentTimeMillis) "milliseconds") + (when (:print-timings argmap) + (let [seconds (:timing-map-seconds generation) + percent (:timing-map-percent generation)] + (printf "Total Time: %8.1f seconds\n" + (:timing-map-total-seconds generation)) + (printf "Initialization: %8.1f seconds, %4.1f%%\n" + (:initialization seconds) (:initialization percent)) + (printf "Reproduction: %8.1f seconds, %4.1f%%\n" + (:reproduction seconds) (:reproduction percent)) + (printf "Fitness Testing: %8.1f seconds, %4.1f%%\n" + (:fitness seconds) (:fitness percent)) + (printf "Report: %8.1f seconds, %4.1f%%\n" + (:report seconds) (:report percent)) + (printf "Other: %8.1f seconds, %4.1f%%\n" + (:other seconds) (:other percent)))) + (println ";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;") + (println ";; -*- End of report for generation" (:index generation)) + (println ";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;") + (flush) + (when (= :continue (:outcome generation)) + (let [best (:best generation)] + (printf "\n\nSUCCESS at generation %s\nSuccessful program: %s\nErrors: %s\nTotal error: %s\nHistory: %s\nSize: %s\n\n" + (:index generation) (:program-str best) (:errors best) (:total-error best) + (:history best) (:program-n-points best)) + (when (:print-ancestors-of-solution argmap) + (printf "\nAncestors of solution:\n") + (prn (:ancestors best))) + (println "\n;;******************************") + (println ";; Problem-Specific Report of Simplified Solution") + (:problem-specific-report-final-simplified-best generation))) + (when (= :failure (:outcome generation)) + (printf "\nFAILURE\n"))) + +(def handler + {:init handle-init + :config handle-config + :generation handle-generation}) diff --git a/src/clojush/pushgp/pushgp.clj b/src/clojush/pushgp/pushgp.clj index 2bbb3422c..057d2ef46 100644 --- a/src/clojush/pushgp/pushgp.clj +++ b/src/clojush/pushgp/pushgp.clj @@ -2,12 +2,12 @@ (:require [clojure.java.io :as io] [clj-random.core :as random] [clojure.repl :as repl] - [clojush.pushgp.record :as r]) + [clojush.log :refer [log! label->computed]]) (:use [clojush args globals util pushstate random individual evaluate simplification translate] [clojush.instructions boolean code common numbers random-instructions string char vectors tag zip return input-output genome] - [clojush.pushgp breed report] - [clojush.pushgp.selection + [clojush.pushgp breed] + [clojush.pushgp.selection selection epsilon-lexicase elitegroup-lexicase implicit-fitness-sharing novelty] [clojush.experimental.decimation])) @@ -40,7 +40,7 @@ (let [population-agents (repeatedly population-size #(make-individual :genome (strip-random-insertion-flags - (random-plush-genome + (random-plush-genome max-genome-size-in-initial-program atom-generators argmap)) @@ -56,8 +56,8 @@ [{:keys [use-single-thread population-size]}] (vec (repeatedly population-size #((if use-single-thread atom agent) - (make-individual) - :error-handler agent-error-handler)))) + (make-individual) + :error-handler agent-error-handler)))) (defn make-rng "Creates the random number generators used by the agents in the population. @@ -66,8 +66,8 @@ (let [random-seeds (loop [seeds '()] (let [num-remaining (- population-size (count seeds))] (if (pos? num-remaining) - (let [new-seeds (repeatedly num-remaining - #(random/lrand-bytes + (let [new-seeds (repeatedly num-remaining + #(random/lrand-bytes (:mersennetwister random/*seed-length*)))] (recur (concat seeds (filter ; only add seeds that we do not already have (fn [candidate] @@ -76,20 +76,20 @@ seeds)))] {:random-seeds random-seeds :rand-gens (vec (doall (for [k (range population-size)] - (random/make-mersennetwister-rng (nth random-seeds k))))) - })) + (random/make-mersennetwister-rng (nth random-seeds k)))))})) + (defn compute-errors [pop-agents rand-gens {:keys [use-single-thread error-function] :as argmap}] (dorun (map #((if use-single-thread swap! send) - % evaluate-individual error-function %2 argmap) + % evaluate-individual error-function %2 argmap) pop-agents rand-gens)) (when-not use-single-thread (apply await pop-agents))) ;; SYNCHRONIZE (defn produce-new-offspring [pop-agents child-agents rand-gens - {:keys [decimation-ratio population-size decimation-tournament-size use-single-thread ]}] + {:keys [decimation-ratio population-size decimation-tournament-size use-single-thread]}] (let [pop (if (>= decimation-ratio 1) (vec (doall (map deref pop-agents))) (decimate (vec (doall (map deref pop-agents))) @@ -100,16 +100,16 @@ (reset! max-age (apply max ages)) (dotimes [i population-size] ((if use-single-thread swap! send) - (nth child-agents i) - breed - i (nth rand-gens i) pop @push-argmap))) + (nth child-agents i) + breed + i (nth rand-gens i) pop @push-argmap))) (when-not use-single-thread (apply await child-agents))) ;; SYNCHRONIZE (defn install-next-generation [pop-agents child-agents {:keys [population-size use-single-thread]}] (dotimes [i population-size] ((if use-single-thread swap! send) - (nth pop-agents i) (fn [av] (deref (nth child-agents i))))) + (nth pop-agents i) (fn [av] (deref (nth child-agents i))))) (when-not use-single-thread (apply await pop-agents))) ;; SYNCHRONIZE (defn check-genetic-operator-probabilities-add-to-one @@ -142,21 +142,18 @@ (defn pushgp "The top-level routine of pushgp." ([] (pushgp '())) - ([args] + ( [args] (reset! timer-atom (System/currentTimeMillis)) (load-push-argmap args) - (when (some? (:record-host @push-argmap)) - (r/host! (str (:record-host @push-argmap)))) (random/with-rng (random/make-mersennetwister-rng (:random-seed @push-argmap)) ;; set globals from parameters (reset-globals) - (initial-report @push-argmap) ;; Print the inital report - (r/uuid! (:run-uuid @push-argmap)) - (print-params (r/config-data! [:argmap] (dissoc @push-argmap :run-uuid))) (check-genetic-operator-probabilities-add-to-one @push-argmap) (timer @push-argmap :initialization) - (when (:print-timings @push-argmap) - (r/config-data! [:initialization-ms] (:initialization @timer-atom))) + (log! :config + {:argmap-input @push-argmap + :timing-map @timing-map + :registered-instructions-input @registered-instructions}) (println "\n;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;") (println "\nGenerating initial population...") (flush) (let [pop-agents (make-pop-agents @push-argmap) @@ -168,7 +165,6 @@ ;; Main loop (loop [generation 0 novelty-archive '()] - (r/new-generation! generation) (println "Processing generation:" generation) (flush) (population-translate-plush-to-push pop-agents @push-argmap) (timer @push-argmap :reproduction) @@ -193,35 +189,37 @@ (calculate-novelty pop-agents novelty-archive @push-argmap)) (timer @push-argmap :other) ;; report and check for success - (let [[outcome best] (report-and-check-for-success (vec (doall (map deref pop-agents))) - generation @push-argmap)] - (r/generation-data! [:outcome] outcome) - (r/end-generation!) - (cond (= outcome :failure) (do (printf "\nFAILURE\n") - (if (:return-simplified-on-failure @push-argmap) - (auto-simplify best - (:error-function @push-argmap) - (:final-report-simplifications @push-argmap) - true - 500) - (flush))) - (= outcome :continue) (let [next-novelty-archive (concat novelty-archive - (select-individuals-for-novelty-archive - (map deref pop-agents) - @push-argmap))] - (timer @push-argmap :report) - (println "\nProducing offspring...") (flush) - (produce-new-offspring pop-agents - child-agents - rand-gens - @push-argmap) - (println "Installing next generation...") (flush) - (install-next-generation pop-agents child-agents @push-argmap) - (recur (inc generation) - next-novelty-archive)) - :else (final-report generation best @push-argmap)))))))) - - - - - + (log! :generation + {:index generation + :pop-agents pop-agents + :selection-counts selection-counts + :evaluations-count-input evaluations-count + :point-evaluations-before-report-input @point-evaluations-count + :point-evaluations-count point-evaluations-count + :timing-map-atom timing-map}) + ;; we let the logging framework generate the outcome and best program, so that the best program + ;; can have the dynamically generated attributes attached to it like :final-simplification + ;; and other things used in the logging + (let [outcome (get-in @label->computed [:generation :outcome])] + (cond + (= outcome :failure) + (if (:return-simplified-on-failure @push-argmap) + (get-in @label->computed [:generation :best :final-simplification]) + (flush)) + (= outcome :success) + (get-in @label->computed [:generation :problem-specific-report-final-simplified-best]) + (= outcome :continue) + (let [next-novelty-archive (concat novelty-archive + (select-individuals-for-novelty-archive + (map deref pop-agents) + @push-argmap))] + + (timer @push-argmap :report) + (println "\nProducing offspring...") (flush) + (produce-new-offspring pop-agents + child-agents + rand-gens + @push-argmap) + (println "Installing next generation...") (flush) + (install-next-generation pop-agents child-agents @push-argmap) + (recur (inc generation) next-novelty-archive))))))))) diff --git a/src/clojush/pushgp/record.clj b/src/clojush/pushgp/record.clj deleted file mode 100644 index 86cc26feb..000000000 --- a/src/clojush/pushgp/record.clj +++ /dev/null @@ -1,114 +0,0 @@ -;;; Records the results of runs to an external server - -;; Use documented in https://push-language.hampshire.edu/t/recording-and-analyzing-experimental-results/830 - -;; If `record-host` is set in the arguments, then we should send -;; send data about each run, as it progresses, to that host for archival -;; and monitoring purposes. - -;; The functions in this file are stateful and should be called in this order: -;; -;; (new-run! uuid! config-data!* (new-generation! generation-data!* end-generation!)*)* -;; -;; Currently it doesn't enforce this and if you call a method when you shouldn't -;; the results are unkown. -;; Also it will not send anything over the network until `host!` is called, -;; before that, `end-generation!` will be a no-op. - -(ns clojush.pushgp.record - (:require [clojure.java.io] - [cheshire.core] - [cheshire.generate] - [clojure.string])) - -;; write functions as strings -(cheshire.generate/add-encoder - clojure.lang.AFunction - cheshire.generate/encode-str) - -(def hostname-and-port (atom nil)) -(def writer (atom nil)) - -(defn- ->writer - ; https://github.com/clojure-cookbook/clojure-cookbook/blob/master/05_network-io/5-09_tcp-client.asciidoc - [] - (let [[hostname port] @hostname-and-port] - (-> (java.net.Socket. hostname port) - clojure.java.io/writer))) - -(defn- set-writer! - ; Tries to get a writer to send data on, and if it fails, retries every - ; 5 seconds - [] - (println "Trying to connect to external server for recording at " @hostname-and-port "...") - (try - (reset! writer (->writer)) - (catch java.net.ConnectException _ - (Thread/sleep 5000) - (set-writer!)))) - -(defn host! [host-str] - (let [[hostname port-str] (clojure.string/split host-str #":")] - (reset! hostname-and-port [hostname (int (bigint port-str))]) - (set-writer!))) - -(defn- write-data! [data] - (when (some? @hostname-and-port) - (println "Trying to record data to external server...") - (try - (do - (cheshire.core/generate-stream data @writer) - (.newLine @writer) - (.flush @writer)) - (catch java.net.SocketException _ - (set-writer!) - (write-data! data))))) - -(def data (atom {})) - - -;; Stores a configuration option for the run, for the sequence of `ks` and value `v` -;; i.e. (config-data! [:git-uuid] "abc-def") -(defn config-data! [ks v] - (swap! data assoc-in (cons :config ks) v) - v) - -(defn seconds-since-epoch - ;; http://stackoverflow.com/a/17432411 - ;; because Spark interprets numbers as dates in this format when in JSON - [] - (quot (System/currentTimeMillis) 1000)) - -;; Resets the run data and saves the start time. Should be called at the -;; begining of a run -(defn new-run! [] - (reset! data {:config {:start-time (seconds-since-epoch)}})) - -(defn uuid! [uuid] - (swap! data assoc :uuid uuid)) - -;; Resets the generation data and should be called at the begining of -;; each generation -(defn new-generation! [index] - (swap! - data - assoc - :index index - :generation {:start-time (seconds-since-epoch)})) - - -;; Stores data about the generation, i.e. -;; (generation-data! [:best :error] [1 2 3 10]) -(defn generation-data! [ks v] - (swap! data assoc-in (cons :generation ks) v) - v) - -;; Sends the data for the current generation over the network to be recorded -;; Also sends the configuration with each generation -(defn end-generation! [] - (let [{:keys [generation uuid index config]} @data] - (write-data! - (assoc generation - :config-uuid uuid - :index index - :config config)))) diff --git a/src/clojush/pushgp/report.clj b/src/clojush/pushgp/report.clj deleted file mode 100644 index 48dd60e23..000000000 --- a/src/clojush/pushgp/report.clj +++ /dev/null @@ -1,615 +0,0 @@ -(ns clojush.pushgp.report - (:use [clojush util globals pushstate simplification individual] - [clojure.data.json :only (json-str)]) - (:require [clojure.string :as string] - [config :as config] - [clj-random.core :as random] - [local-file] - [clojure.data.csv :as csv] - [clojure.java.io :as io] - [clojush.pushgp.record :as r])) - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; helper functions - -(defn default-problem-specific-initial-report - "Customize this for your own problem. It will be called at the beginning of the initial report." - [argmap] - :no-problem-specific-initial-report-function-defined) - -(defn default-problem-specific-report - "Customize this for your own problem. It will be called at the beginning of the generational report." - [best population generation error-function report-simplifications] - :no-problem-specific-report-function-defined) - -(defn git-last-commit-hash - "Returns the last Git commit hash" - [] - (let [dir (local-file/project-dir)] - (string/trim - (slurp - (str dir - "/.git/" - (subs - (string/trim - (slurp - (str dir "/.git/HEAD"))) - 5)))))) - -(defn print-params [push-argmap] - (doseq [[param val] push-argmap] - (if (= param :random-seed) - (println (name param) "=" (random/seed-to-string val)) - (println (name param) "=" val)))) - -(defn print-genome [individual] - (pr-str (not-lazy (map #(dissoc % :uuid :parent-uuid) (:genome individual))))) - -(defn behavioral-diversity - "Returns the behavioral diversity of the population, as described by David - Jackson in 'Promoting phenotypic diversity in genetic programming'. It is - the percent of distinct behavior vectors in the population." - [population] - (float (/ (count (distinct (map :behaviors population))) - (count population)))) - -(defn sample-population-edit-distance - "Returns a sample of Levenshtein distances between programs in the population, - where each is divided by the length of the longer program." - [pop samples] - (let [instr-programs (map #(map :instruction %) - (map :genome pop))] - (repeatedly samples - #(let [prog1 (random/lrand-nth instr-programs) - prog2 (random/lrand-nth instr-programs) - longer-length (max (count prog1) (count prog2))] - (if (zero? longer-length) - 0 - (float (/ (levenshtein-distance prog1 prog2) - longer-length))))))) - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; log printing (csv and json) - -(defn csv-print - "Prints a csv of the population, with each individual's fitness and size. - If log-fitnesses-for-all-cases is true, it also prints the value - of each fitness case." - [population generation {:keys [csv-log-filename csv-columns]}] - (let [columns (concat [:uuid] - (filter #(some #{%} csv-columns) - [:generation :location :parent-uuids :genetic-operators - :push-program-size :plush-genome-size :push-program - :plush-genome :total-error :is-random-replacement]))] - (when (zero? generation) - (with-open [csv-file (io/writer csv-log-filename :append false)] - (csv/write-csv csv-file - (vector (concat (map name columns) - (when (some #{:test-case-errors} csv-columns) - (map #(str "TC" %) - (range (count (:errors (first population))))))))))) - (with-open [csv-file (io/writer csv-log-filename :append true)] - (csv/write-csv - csv-file - (map-indexed - (fn [location individual] - (concat (map (assoc (into {} individual) - :generation generation - :location location - :parent-uuids (let [parent-uuids (not-lazy - (map str (:parent-uuids individual)))] - (if (empty? parent-uuids) - [] - parent-uuids)) - :genetic-operators (if (nil? (:genetic-operators individual)) - [] - (:genetic-operators individual)) - :push-program-size (count-points (:program individual)) - :push-program (if (and (seq? (:program individual)) - (empty? (:program individual))) - "()" - (:program individual)) - :plush-genome-size (count (:genome individual)) - :plush-genome (if (empty? (:genome individual)) - "()" - (not-lazy (:genome individual)))) - ; This is a map of an individual - columns) - (when (some #{:test-case-errors} csv-columns) - (:errors individual)))) - population))))) - -(defn edn-print - "Takes a population and appends all the individuals to the EDN log file. - If the internal representation of individuals changes in future versions - of Clojush, this code will likely continue to work, but will produce - output corresponding to the new representation." - [population generation edn-log-filename keys additional-keys] - (with-open [w (io/writer edn-log-filename :append true)] ;; Opens and closes the file once per call - (doall - (map-indexed (fn [index individual] - (let [additional-data {:generation generation - :location index - :push-program-size (count-points (:program individual)) - :plush-genome-size (count (:genome individual))}] - (.write w "#clojush/individual") - (.write w (prn-str (merge - (select-keys additional-data additional-keys) - (select-keys individual keys)))))) - population)))) - -(defn jsonize-individual - "Takes an individual and returns it with only the items of interest - for the json logs." - [log-fitnesses-for-all-cases json-log-program-strings generation individual] - (let [part1-ind (-> (if log-fitnesses-for-all-cases - {:errors (:errors individual)} - {}) - (assoc :total-error (:total-error individual)) - (assoc :generation generation) - (assoc :size (count-points (:program individual)))) - part2-ind (if json-log-program-strings - (assoc part1-ind :program (str (not-lazy (:program individual)))) - part1-ind) - part3-ind (if (:weighted-error individual) - (assoc part2-ind :weighted-error (:weighted-error individual)) - part2-ind)] - part3-ind)) - -(defn json-print - "Prints a json file of the population, with each individual's fitness and size. - If log-fitnesses-for-all-cases is true, it also prints the value - of each fitness case." - [population generation json-log-filename log-fitnesses-for-all-cases - json-log-program-strings] - (let [pop-json-string (json-str (map #(jsonize-individual - log-fitnesses-for-all-cases - json-log-program-strings - generation - %) - population))] - (if (zero? generation) - (spit json-log-filename (str pop-json-string "\n") :append false) - (spit json-log-filename (str "," pop-json-string "\n") :append true)))) - - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; report printing functions - -(defn lexicase-report - "This extra report is printed whenever lexicase selection is used." - [population {:keys [error-function report-simplifications print-errors - print-history meta-error-categories]}] - - (let [min-error-by-case (apply map - (fn [& args] (apply min args)) - (map :errors population)) - lex-best (apply max-key - (fn [ind] - (apply + (map #(if (== %1 %2) 1 0) - (:errors ind) - min-error-by-case))) - population) - pop-elite-by-case (map (fn [ind] - (map #(if (== %1 %2) 1 0) - (:errors ind) - min-error-by-case)) - population) - count-elites-by-case (map #(apply + %) (apply mapv vector pop-elite-by-case)) - most-zero-cases-best (apply max-key - (fn [ind] - (apply + (map #(if (zero? %) 1 0) - (:errors ind)))) - population) - pop-zero-by-case (map (fn [ind] - (map #(if (zero? %) 1 0) - (:errors ind))) - population) - count-zero-by-case (map #(apply + %) (apply mapv vector pop-zero-by-case))] - - (println "--- Lexicase Program with Most Elite Cases Statistics ---") - (println "Lexicase best genome:" (print-genome lex-best)) - (println "Lexicase best program:" (pr-str (not-lazy (:program lex-best)))) - (when (> report-simplifications 0) - (println "Lexicase best partial simplification:" - (pr-str (not-lazy (:program (auto-simplify lex-best - error-function - report-simplifications - false - 1000)))))) - (when print-errors (println "Lexicase best errors:" (not-lazy (:errors lex-best)))) - (when (and print-errors (not (empty? meta-error-categories))) - (println "Lexicase best meta-errors:" (not-lazy (:meta-errors lex-best)))) - (println "Lexicase best number of elite cases:" (apply + (map #(if (== %1 %2) 1 0) - (:errors lex-best) - min-error-by-case))) - (println "Lexicase best total error:" (:total-error lex-best)) - (println "Lexicase best mean error:" (float (/ (:total-error lex-best) - (count (:errors lex-best))))) - (when print-history (println "Lexicase best history:" (not-lazy (:history lex-best)))) - (println "Lexicase best size:" (count-points (:program lex-best))) - (printf "Percent parens: %.3f\n" - (double (/ (count-parens (:program lex-best)) - (count-points (:program lex-best))))) ;Number of (open) parens / points - (println "--- Lexicase Program with Most Zero Cases Statistics ---") - (println "Zero cases best genome:" (print-genome most-zero-cases-best)) - (println "Zero cases best program:" (pr-str (not-lazy (:program most-zero-cases-best)))) - (when (> report-simplifications 0) - (println "Zero cases best partial simplification:" - (pr-str (not-lazy (:program (auto-simplify most-zero-cases-best - error-function - report-simplifications - false - 1000)))))) - (when print-errors (println "Zero cases best errors:" (not-lazy (:errors most-zero-cases-best)))) - (when (and print-errors (not (empty? meta-error-categories))) - (println "Zero cases best meta-errors:" (not-lazy (:meta-errors most-zero-cases-best)))) - (println "Zero cases best number of elite cases:" - (apply + (map #(if (== %1 %2) 1 0) - (:errors most-zero-cases-best) - min-error-by-case))) - (println "Zero cases best number of zero cases:" - (apply + (map #(if (< %1 min-number-magnitude) 1 0) - (:errors most-zero-cases-best)))) - (println "Zero cases best total error:" (:total-error most-zero-cases-best)) - (println "Zero cases best mean error:" - (float (/ (:total-error most-zero-cases-best) - (count (:errors most-zero-cases-best))))) - (when print-history (println "Zero cases best history:" - (not-lazy (:history most-zero-cases-best)))) - (println "Zero cases best size:" (count-points (:program most-zero-cases-best))) - (printf "Percent parens: %.3f\n" - (double (/ (count-parens (:program most-zero-cases-best)) - (count-points (:program most-zero-cases-best))))) ;Number of (open) parens / points - (println "--- Lexicase Population Statistics ---") - (println "Count of elite individuals by case:" count-elites-by-case) - (println (format "Population mean number of elite cases: %.2f" - (float (/ (apply + count-elites-by-case) (count population))))) - (println "Count of perfect (error zero) individuals by case:" count-zero-by-case) - (println (format "Population mean number of perfect (error zero) cases: %.2f" - (float (/ (apply + count-zero-by-case) (count population))))))) - - -(defn implicit-fitness-sharing-report - "This extra report is printed whenever implicit fitness sharing selection is used." - [population {:keys [print-errors meta-error-categories]}] - (let [ifs-best (apply min-key :weighted-error population)] - (println "--- Program with Best Implicit Fitness Sharing Error Statistics ---") - (println "IFS best genome:" (print-genome ifs-best)) - (println "IFS best program:" (pr-str (not-lazy (:program ifs-best)))) - (when print-errors (println "IFS best errors:" (not-lazy (:errors ifs-best)))) - (when (and print-errors (not (empty? meta-error-categories))) - (println "IFS best meta-errors:" (not-lazy (:meta-errors ifs-best)))) - (println "IFS best total error:" (:total-error ifs-best)) - (println "IFS best mean error:" (float (/ (:total-error ifs-best) - (count (:errors ifs-best))))) - (println "IFS best IFS error:" (:weighted-error ifs-best)) - (println "IFS best size:" (count-points (:program ifs-best))) - (printf "IFS best percent parens: %.3f\n" - (double (/ (count-parens (:program ifs-best)) - (count-points (:program ifs-best))))))) ;Number of (open) parens / points - - -(defn report-and-check-for-success - "Reports on the specified generation of a pushgp run. Returns the best - individual of the generation." - [population generation - {:keys [error-function report-simplifications meta-error-categories - error-threshold max-generations population-size - print-errors print-history print-cosmos-data print-timings - problem-specific-report total-error-method - parent-selection print-homology-data max-point-evaluations - print-error-frequencies-by-case normalization autoconstructive - print-selection-counts exit-on-success - ;; The following are for CSV or JSON logs - print-csv-logs print-json-logs csv-log-filename json-log-filename - log-fitnesses-for-all-cases json-log-program-strings - print-edn-logs edn-keys edn-log-filename edn-additional-keys] - - :as argmap}] - (r/generation-data! [:population] - (map #(dissoc % :program) population)) - - (println) - (println ";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;") - (println ";; -*- Report at generation" generation) - (let [point-evaluations-before-report @point-evaluations-count - err-fn (if (= total-error-method :rmse) :weighted-error :total-error) - sorted (sort-by err-fn < population) - err-fn-best (first sorted) - psr-best (problem-specific-report err-fn-best - population - generation - error-function - report-simplifications) - best (if (= (type psr-best) clojush.individual.individual) - psr-best - err-fn-best) - standard-deviation (fn [nums] - (if (<= (count nums) 1) - (str "Cannot find standard deviation of " - (count nums) - "numbers. Must have at least 2.") - (let [mean (mean nums)] - (Math/sqrt (/ (apply +' (map #(* (- % mean) (- % mean)) - nums)) - (dec (count nums))))))) - quartiles (fn [nums] - (if (zero? (count nums)) - "Cannot find quartiles of zero numbers." - (let [sorted (sort nums)] - (vector (nth sorted - (truncate (/ (count nums) 4))) - (nth sorted - (truncate (/ (count nums) 2))) - (nth sorted - (truncate (/ (* 3 (count nums)) 4)))))))] - - (when print-error-frequencies-by-case - (println "Error frequencies by case:" - (doall (map frequencies (apply map vector (map :errors population)))))) - (when (some #{parent-selection} - #{:lexicase :elitegroup-lexicase :leaky-lexicase :epsilon-lexicase - :random-threshold-lexicase}) - (lexicase-report population argmap)) - (when (= total-error-method :ifs) (implicit-fitness-sharing-report population argmap)) - (println (format "--- Best Program (%s) Statistics ---" (str "based on " (name err-fn)))) - (r/generation-data! [:best :individual] (dissoc best :program)) - (println "Best genome:" (print-genome best)) - (println "Best program:" (pr-str (not-lazy (:program best)))) - (when (> report-simplifications 0) - (println "Partial simplification:" - (pr-str (not-lazy (:program (r/generation-data! [:best :individual-simplified] - (auto-simplify best - error-function - report-simplifications - false - 1000))))))) - (when print-errors (println "Errors:" (not-lazy (:errors best)))) - (when (and print-errors (not (empty? meta-error-categories))) - (println "Meta-Errors:" (not-lazy (:meta-errors best)))) - (println "Total:" (:total-error best)) - (let [mean (r/generation-data! [:best :mean-error] (float (/ (:total-error best) - (count (:errors best)))))] - - (println "Mean:" mean)) - (when (not= normalization :none) - (println "Normalized error:" (:normalized-error best))) - (case total-error-method - :hah (println "HAH-error:" (:weighted-error best)) - :rmse (println "RMS-error:" (:weighted-error best)) - :ifs (println "IFS-error:" (:weighted-error best)) - nil) - (when (= parent-selection :novelty-search) - (println "Novelty: " (float (:novelty best)))) - (when print-history (println "History:" (not-lazy (:history best)))) - (println "Genome size:" (r/generation-data! [:best :genome-size] (count (:genome best)))) - (println "Size:" (r/generation-data! [:best :program-size] (count-points (:program best)))) - (printf "Percent parens: %.3f\n" - (r/generation-data! [:best :percent-parens] - (double (/ (count-parens (:program best)) - (count-points (:program best)))))) ;Number of (open) parens / points - (println "--- Population Statistics ---") - (when print-cosmos-data - (println "Cosmos Data:" (let [quants (config/quantiles (count population))] - (zipmap quants - (map #(:total-error (nth (sort-by :total-error population) %)) - quants))))) - (println "Average total errors in population:" - (r/generation-data! [:population-report :mean-total-error] - (*' 1.0 (mean (map :total-error sorted))))) - (println "Median total errors in population:" - (r/generation-data! [:population-report :median-total-error] - (median (map :total-error sorted)))) - (when print-errors (println "Error averages by case:" - (apply map (fn [& args] (*' 1.0 (mean args))) - (map :errors population)))) - (when print-errors (println "Error minima by case:" - (apply map (fn [& args] (apply min args)) - (map :errors population)))) - (when (and print-errors (not (empty? meta-error-categories))) - (println "Meta-Error averages by category:" - (apply map (fn [& args] (*' 1.0 (mean args))) - (map :meta-errors population))) - (println "Meta-Error minima by category:" - (apply map (fn [& args] (apply min args)) - (map :meta-errors population)))) - (println "Average genome size in population (length):" - (r/generation-data! [:population-report :mean-genome-size] - (*' 1.0 (mean (map count (map :genome sorted)))))) - (println "Average program size in population (points):" - (r/generation-data! [:population-report :mean-program-size] - (*' 1.0 (mean (map count-points (map :program sorted)))))) - (printf "Average percent parens in population: %.3f\n" - (r/generation-data! [:population-report :mean-program-percent-params] - (mean (map #(double (/ (count-parens (:program %)) (count-points (:program %)))) sorted)))) - (let [ages (map :age population)] - (println "Minimum age in population:" - (r/generation-data! [:population-report :min-age] - (* 1.0 (apply min ages)))) - (println "Maximum age in population:" - (r/generation-data! [:population-report :max-age] - (* 1.0 (apply max ages)))) - (println "Average age in population:" - (r/generation-data! [:population-report :mean-age] - (* 1.0 (mean ages)))) - (println "Median age in population:" - (r/generation-data! [:population-report :median-age] - (* 1.0 (median ages))))) - (let [grain-sizes (map :grain-size population)] - (println "Minimum grain-size in population:" - (r/generation-data! [:population-report :min-grain-size] - (* 1.0 (apply min grain-sizes)))) - (println "Maximum grain-size in population:" - (r/generation-data! [:population-report :max-grain-size] - (* 1.0 (apply max grain-sizes)))) - (println "Average grain-size in population:" - (r/generation-data! [:population-report :mean-grain-size] - (* 1.0 (mean grain-sizes)))) - (println "Median grain-size in population:" - (r/generation-data! [:population-report :median-grain-size] - (* 1.0 (median grain-sizes))))) - (println "--- Population Diversity Statistics ---") - (let [genome-frequency-map (frequencies (map :genome population))] - (println "Min copy number of one Plush genome:" - (r/generation-data! [:population-report :min-genome-frequency] - (apply min (vals genome-frequency-map)))) - (println "Median copy number of one Plush genome:" - (r/generation-data! [:population-report :median-genome-frequency] - (median (vals genome-frequency-map)))) - (println "Max copy number of one Plush genome:" - (r/generation-data! [:population-report :max-genome-frequency] - (apply max (vals genome-frequency-map)))) - (println "Genome diversity (% unique Plush genomes):\t" - (r/generation-data! [:population-report :percent-genomes-unique] - (float (/ (count genome-frequency-map) (count population)))))) - (let [frequency-map (frequencies (map :program population))] - (println "Min copy number of one Push program:" - (r/generation-data! [:population-report :min-program-frequency] - (apply min (vals frequency-map)))) - (println "Median copy number of one Push program:" - (r/generation-data! [:population-report :median-program-frequency] - (median (vals frequency-map)))) - (println "Max copy number of one Push program:" - (r/generation-data! [:population-report :max-program-frequency] - (apply max (vals frequency-map)))) - (println "Syntactic diversity (% unique Push programs):\t" - (r/generation-data! [:population-report :percent-programs-unique] - (float (/ (count frequency-map) (count population)))))) - (println "Total error diversity:\t\t\t\t" - (r/generation-data! [:population-report :percent-total-error-unique] - (float (/ (count (distinct (map :total-error population))) (count population))))) - (println "Error (vector) diversity:\t\t\t" - (r/generation-data! [:population-report :percent-errors-unique] - (float (/ (count (distinct (map :errors population))) (count population))))) - (when (not (nil? (:behaviors (first population)))) - (println "Behavioral diversity:\t\t\t\t" (behavioral-diversity population))) - (when print-homology-data - (let [num-samples 1000 - sample-1 (sample-population-edit-distance population num-samples) - [first-quart-1 median-1 third-quart-1] (quartiles sample-1)] - (println "--- Population Homology Statistics (all stats reference the sampled population edit distance of programs) ---") - (println "Number of homology samples:" num-samples) - (println "Average: " (mean sample-1)) - (println "Standard deviation: " (standard-deviation sample-1)) - (println "First quartile: " first-quart-1) - (println "Median: " median-1) - (println "Third quartile: " third-quart-1))) - - (when print-selection-counts - (println "Selection counts:" - (sort > (concat (vals @selection-counts) - (repeat (- population-size (count @selection-counts)) 0)))) - (reset! selection-counts {})) - (when autoconstructive - (println "Number of random replacements for non-diversifying individuals:" - (r/generation-data! [:population-report :number-random-replacements] - (count (filter :is-random-replacement population))))) - (println "--- Run Statistics ---") - (println "Number of program evaluations used so far:" @evaluations-count) - (println "Number of point (instruction) evaluations so far:" point-evaluations-before-report) - (reset! point-evaluations-count point-evaluations-before-report) - (println "--- Timings ---") - (println "Current time:" (System/currentTimeMillis) "milliseconds") - (when print-timings - (let [total-time (apply + (vals @timing-map)) - init (get @timing-map :initialization) - reproduction (get @timing-map :reproduction) - fitness (get @timing-map :fitness) - report-time (get @timing-map :report) - other (get @timing-map :other)] - (printf "Total Time: %8.1f seconds\n" - (/ total-time 1000.0)) - (printf "Initialization: %8.1f seconds, %4.1f%%\n" - (/ init 1000.0) (* 100.0 (/ init total-time))) - (printf "Reproduction: %8.1f seconds, %4.1f%%\n" - (/ reproduction 1000.0) (* 100.0 (/ reproduction total-time))) - (printf "Fitness Testing: %8.1f seconds, %4.1f%%\n" - (/ fitness 1000.0) (* 100.0 (/ fitness total-time))) - (printf "Report: %8.1f seconds, %4.1f%%\n" - (/ report-time 1000.0) (* 100.0 (/ report-time total-time))) - (printf "Other: %8.1f seconds, %4.1f%%\n" - (/ other 1000.0) (* 100.0 (/ other total-time))))) - (println ";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;") - (println ";; -*- End of report for generation" generation) - (println ";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;") - (flush) - (when print-csv-logs (csv-print population generation argmap)) - (when print-json-logs (json-print population generation json-log-filename - log-fitnesses-for-all-cases json-log-program-strings)) - (when print-edn-logs - (edn-print population generation edn-log-filename edn-keys edn-additional-keys)) - (cond (and exit-on-success - (or (<= (:total-error best) error-threshold) - (:success best))) [:success best] - (>= generation max-generations) [:failure best] - (>= @point-evaluations-count max-point-evaluations) [:failure best] - :else [:continue best]))) - -(defn initial-report - "Prints the initial report of a PushGP run." - [{:keys [problem-specific-initial-report] :as push-argmap}] - (problem-specific-initial-report push-argmap) - (println "Registered instructions:" - (r/config-data! [:registered-instructions] @registered-instructions)) - (println "Starting PushGP run.") - (printf "Clojush version = ") - (try - (let [version-str (apply str (butlast (re-find #"\".*\"" - (first (string/split-lines - (local-file/slurp* "project.clj")))))) - version-number (.substring version-str 1 (count version-str))] - (if (empty? version-number) - (throw Exception) - (printf (str (r/config-data! [:version-number] version-number)) "\n"))) - (flush) - (catch Exception e - (printf "version number unavailable\n") - (flush))) - (try - (let [git-hash (git-last-commit-hash)] - (if (empty? git-hash) - (throw Exception) - (do - ;; NOTES: - Last commit hash will only be correct if this code has - ;; been committed already. - ;; - GitHub link will only work if commit has been pushed - ;; to GitHub. - (r/config-data! [:git-hash] git-hash) - (printf (str "Hash of last Git commit = " git-hash "\n")) - (printf (str "GitHub link = https://github.com/lspector/Clojush/commit/" - git-hash - "\n")) - (flush)))) - (catch Exception e - (printf "Hash of last Git commit = unavailable\n") - (printf "GitHub link = unavailable\n") - (flush))) - (if (:print-edn-logs push-argmap) - ;; The edn log is overwritten if it exists - (with-open [w (io/writer (:edn-log-filename push-argmap) :append false)] - (.write w "#clojush/run") - (.write w (prn-str (dissoc push-argmap - ;; These keys have functions - :atom-generators - :error-function - :problem-specific-report - :random-seed)))))) - - -(defn final-report - "Prints the final report of a PushGP run if the run is successful." - [generation best - {:keys [error-function final-report-simplifications report-simplifications - print-ancestors-of-solution problem-specific-report]}] - (printf "\n\nSUCCESS at generation %s\nSuccessful program: %s\nErrors: %s\nTotal error: %s\nHistory: %s\nSize: %s\n\n" - generation (pr-str (not-lazy (:program best))) (not-lazy (:errors best)) (:total-error best) - (not-lazy (:history best)) (count-points (:program best))) - (when print-ancestors-of-solution - (printf "\nAncestors of solution:\n") - (prn (:ancestors best))) - (let [simplified-best (auto-simplify best error-function final-report-simplifications true 500)] - (println "\n;;******************************") - (println ";; Problem-Specific Report of Simplified Solution") - (problem-specific-report simplified-best [] generation error-function report-simplifications))) - diff --git a/src/clojush/structured_logger.clj b/src/clojush/structured_logger.clj new file mode 100644 index 000000000..a65d81384 --- /dev/null +++ b/src/clojush/structured_logger.clj @@ -0,0 +1,57 @@ +;; This should could get extracted out to an external repository eventually. +;; +;; This is designed to support dynamically generating log data only when required. To do this, we use +;; plumbing's Graph and lazy maps. +;; +;; It maintains an atom of already computed data, so that later log events can reference data generated from +;; earlier events. +(ns clojush.structured-logger + (require [plumbing.graph :as graph] + [plumbing.core :refer [fnk map-vals]] + [plumbing.fnk.pfnk :as pfnk])) + +;; https://github.com/plumatic/plumbing/issues/4#issuecomment-13449331 +; (defn merge-inputs [f] +; (pfnk/fn->fnk +; (fn [m] (merge m (f m))) +; [(pfnk/input-schema f) +; (merge (pfnk/output-schema f) +; (plumbing.map/keep-leaves identity (pfnk/input-schema f)))])) + +;; terms: +;; +;; label: is the name of an event we want to log, i.e. :start, :end, etc +;; input: is the stuff you want to log i.e. {:id 1 :start-time 20} +;; compute-graph: is a graph that that takes in the existing mapping of labels->computed +;; along with the `input`, and returns the new `computed` for the event. +;; computed: is the return value of compute-graph +;; handler: map of label to handle function. Each handle function is called with the `labels->computed` +;; that exist when the that event is called. +(def structured-logger-graph + {:label->compute-fn + (fnk [label->compute-graph] + (map-vals graph/lazy-compile label->compute-graph)) + :label->computed + (fnk [] (atom {})) + :log! + (fnk [label->compute-fn label->computed handlers] + (fn [label input] + ;; compute and store + (let [compute-fn (label label->compute-fn) + ;; we wanna pass all the comptued data so far (label->computed) + ;; into the compute function, along with the input, so that + ;; we can access the push argmap in computing some stuff. + + compute-fn-input (-> @label->computed + (dissoc label) + (merge input)) + computed (compute-fn compute-fn-input)] + (swap! label->computed assoc label computed)) + ;; trigger handlers + (doseq [handler handlers + :let [handle-fn (label handler)] + :when (some? handle-fn)] + (handle-fn @label->computed))))}) + +(def ->structured-logger + (graph/compile structured-logger-graph)) diff --git a/test/clojush/test/structured_logger_test.clj b/test/clojush/test/structured_logger_test.clj new file mode 100644 index 000000000..cdce43fb5 --- /dev/null +++ b/test/clojush/test/structured_logger_test.clj @@ -0,0 +1,45 @@ +(ns clojush.test.structured-logger-test + (:require [plumbing.core :refer [fnk]] + [clojure.test :refer :all] + [clojure.walk] + [clojush.structured-logger :refer [->structured-logger]]) + (:import (lazymap.core LazyPersistentMap))) + +(def handler-1-event-a (atom nil)) +(def handler-1-event-b (atom nil)) +(def handler-2-event-b (atom nil)) + +(def structured-logger + (->structured-logger + {:handlers [{:a (partial reset! handler-1-event-a) + :b (partial reset! handler-1-event-b)} + {:b (partial reset! handler-2-event-b)}] + :label->compute-graph + {:a {:hi (fnk [a-input] a-input)} + :b {:there (fnk [b-input] b-input) + :from-a (fnk [a] a)}}})) + +(def log! (:log! structured-logger)) + +(defn into-map [form] + (clojure.walk/prewalk + #(if (instance? LazyPersistentMap %) (into {} %) %) + form)) + +(deftest event-handling-works [] + (log! :a {:a-input "a input"}) + (is (= (into-map @handler-1-event-a) + {:a {:hi "a input"}})) + + (log! :b {:b-input "b input"}) + (is (= (into-map @handler-1-event-b) + (into-map @handler-2-event-b) + {:a {:hi "a input"} + :b {:there "b input" + :from-a {:hi "a input"}}})) + + (log! :a {:a-input "new a input"}) + (is (= (into-map @handler-1-event-a) + {:a {:hi "new a input"} + :b {:there "b input" + :from-a {:hi "a input"}}})))