Skip to content

Commit

Permalink
Kicking screaming 21st century (#49)
Browse files Browse the repository at this point in the history
* Adding some resources and have the confusion matrix displaying.

* Moved a lot of the web development stuff into suite.

* Large push to make cljs the default way to display information.

* Need to make sure the resources directory exists.

* Figured out why the css wasn't live updating.

* Updated ignore with common files.

* Last commit before attempting merge
  • Loading branch information
Chris Nuernberger committed Dec 9, 2016
1 parent c479a7c commit 0201405
Show file tree
Hide file tree
Showing 15 changed files with 712 additions and 459 deletions.
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,8 @@ checkouts
.lein-*
.nrepl-port
models
figwheel_server.log
mnist
resources
trained-network.nippy
trained-networks
2 changes: 1 addition & 1 deletion compute/src/think/compute/nn/evaluate.clj
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
(double (opt/evaluate-softmax run-results answer-seq))))


(defn evaulate-softmax-description
(defn evaluate-softmax-description
[net-desc backend-fn dataset input-labels & {:keys [output-index dataset-label-name batch-type batch-size]
:or {output-index 0
dataset-label-name :labels
Expand Down
111 changes: 51 additions & 60 deletions compute/src/think/compute/nn/train.clj
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
[think.resource.core :as resource]
[cortex.nn.protocols :as cp]
[cortex.nn.description :as desc]
[cortex.dataset :as ds]
[think.compute.nn.description :as compute-desc]
[clojure.core.matrix :as m]))

Expand Down Expand Up @@ -72,7 +73,7 @@
[{:keys [network batching-system] :as train-config} batch-sequence]
(when-let [next-batch (first batch-sequence)]
(let [backend (layers/get-backend network)
{:keys [input-buffers output-host-buffers]} next-batch
{:keys [input-buffers]} next-batch
;;Note no prepare-calc call
network (cp/multi-calc network input-buffers)
train-config (assoc train-config :network network)
Expand All @@ -92,8 +93,7 @@
n-output-vec buffer-seq))]
(cons {:train-config train-config
:inferences (splitter (map #(nn-backend/to-double-array backend %)
(cp/multi-output network)))
:labels (splitter output-host-buffers)}
(cp/multi-output network)))}
(lazy-seq (recur-run-config train-config (rest batch-sequence)))))))


Expand All @@ -103,34 +103,25 @@ of the network for each"
[{:keys [network batching-system] :as train-config} batch-type]
(let [eval-sequence
(lazy-seq (recur-run-config train-config
;;false here because we don't need the result uploaded to the cpu
;;false here because we don't need the result
;;uploaded to the gpu
(batch/get-batches batching-system batch-type false)))
n-outputs (count (cp/multi-output-size network))
coalesced-data (reduce (fn [coalesced-data {:keys [train-config inferences labels]}]
(let [new-labels (when (seq labels)
(conj (:labels coalesced-data) (vec labels)))]
(assoc coalesced-data
:train-config train-config
:inferences (conj (:inferences coalesced-data) (vec inferences))
:labels new-labels)))
{:inferences []
:labels []}
eval-sequence)
coalesced->vec (fn [data-seq]
(when (seq data-seq)
(mapv (fn [idx]
(mapcat #(nth % idx) data-seq))
(range n-outputs))))]
(assoc coalesced-data
:train-config train-config
:inferences (conj (:inferences coalesced-data)
(vec inferences))))
{:inferences []}
eval-sequence)]
{:train-config (:train-config coalesced-data)
:inferences (coalesced->vec (:inferences coalesced-data))
:labels (coalesced->vec (:labels coalesced-data))
}))
:inferences (ds/batches->columns (:inferences coalesced-data))}))


(defn evaluate-training-network
"Run the network and return the average loss across all cv-input"
[train-config batch-type]
(let [{:keys [train-config inferences labels] :as run-config-output}
[labels train-config batch-type]
(let [{:keys [train-config inferences] :as run-config-output}
(run-config train-config batch-type)
{:keys [loss-fn]} train-config]
;;when there were any batches to begin with
Expand All @@ -140,8 +131,9 @@ of the network for each"


(defn println-report-epoch
[epoch-idx {:keys [batching-system dataset] :as train-config}]
(if-let [evaluated-network-data (evaluate-training-network train-config :cross-validation)]
[labels epoch-idx {:keys [batching-system dataset] :as train-config}]
(if-let [evaluated-network-data (evaluate-training-network labels train-config
:cross-validation)]
(let [{:keys [train-config avg-loss]} evaluated-network-data]
(println (format "Epoch loss: %s" avg-loss))
train-config)
Expand All @@ -160,9 +152,10 @@ of the network for each"
[net optimiser dataset input-labels output-labels-and-loss]
(let [backend (layers/get-backend net)
batch-size (layers/batch-size net)
batching-system (-> (batch/create-dataset-batching-system input-labels (mapv first output-labels-and-loss) batch-size
dataset (drv/get-driver backend) (drv/get-stream backend)
(dtype/get-datatype backend))
batching-system (-> (batch/create-dataset-batching-system
input-labels (mapv first output-labels-and-loss) batch-size
dataset (drv/get-driver backend) (drv/get-stream backend)
(dtype/get-datatype backend))
batch/setup)
loss-fns (mapv (fn [[label loss] output-size]
(opt/setup-loss loss backend batch-size output-size))
Expand All @@ -171,64 +164,62 @@ of the network for each"
{:network net :optimiser optimiser :loss-fn loss-fns :batching-system batching-system}))


(defn create-train-epoch-sequence
"Create an infinite sequence of train-configs where each next config
is trained one more epoch than the config before it.
Drops the initial config (which could be completely untrained). Note that
this could allocate a significant amount of resources so you cannot pass this
sequence as is around a program."
[net optimiser dataset input-labels output-labels-and-loss]
(->> (build-train-config net optimiser dataset input-labels output-labels-and-loss)
train-epoch-seq
(drop 1)))


(defn train
"Epoch train filter takes an epoch-index and a train config and produces a new
train config; providing an opportunity for side effects (e.g., printing)."
[net optimiser dataset input-labels output-labels-and-loss epoch-count
& {:keys [epoch-train-filter]
:or {epoch-train-filter println-report-epoch}}]
:or {epoch-train-filter :unset}}]
(resource/with-resource-context
(let [epoch-filter (if epoch-train-filter
epoch-train-filter
(fn [idx train-cfg] train-cfg))]
(->> (build-train-config net optimiser dataset input-labels output-labels-and-loss)
train-epoch-seq
(drop 1)
(if (= epoch-train-filter :unset)
(let [output-labels (mapv first output-labels-and-loss)
dataset-labels (ds/batches->columns
(ds/get-batches dataset (layers/batch-size net)
:cross-validation output-labels))]
(partial println-report-epoch dataset-labels))
epoch-train-filter)
(fn [idx train-config]
train-config))]
(->> (create-train-epoch-sequence net optimiser dataset input-labels
output-labels-and-loss)
(map-indexed epoch-filter)
(take epoch-count)
last
:network))))


(defn train-description
"Same as train but takes and returns a description instead of a live network.
Also takes a function that produces a network backend. This avoids leaking leaks gpu
resources to the user."
[net-desc backend-fn optimiser dataset input-labels output-labels-and-loss epoch-count batch-size
& {:keys [epoch-train-filter]
:or {epoch-train-filter println-report-epoch}}]
(resource/with-resource-context
(let [network (compute-desc/build-and-create-network net-desc (backend-fn) batch-size)]
(-> (train network optimiser dataset input-labels output-labels-and-loss epoch-count)
desc/network->description))))


(defn infer-network
[net dataset input-labels output-labels & {:keys [batch-type]
:or {batch-type :holdout}}]
(resource/with-resource-context
(let [backend (layers/get-backend net)
batch-size (layers/batch-size net)
batching-system (-> (batch/create-dataset-batching-system input-labels output-labels batch-size
dataset (drv/get-driver backend) (drv/get-stream backend)
batching-system (-> (batch/create-dataset-batching-system input-labels output-labels
batch-size dataset
(drv/get-driver backend)
(drv/get-stream backend)
(dtype/get-datatype backend))
batch/setup)]
(select-keys (run-config {:network net :batching-system batching-system} batch-type)
[:inferences :labels]))))


(defn run
"Run a network products a vector of output sequences, one sequence for each output of the network."
"Run a network products a vector of output sequences, one sequence for
each output of the network."
[net dataset input-labels & {:keys [batch-type]
:or {batch-type :holdout}}]
(:inferences (infer-network net dataset input-labels [] :batch-type batch-type)))


(defn run-description
"Run a network from a description, producing a vector of output sequences, one sequences for each
output of the network."
[net-desc backend-fn dataset input-labels batch-size & {:keys [batch-type]
:or {batch-type :holdout}}]
(resource/with-resource-context
(let [network (compute-desc/build-and-create-network net-desc (backend-fn) batch-size)]
(run network dataset input-labels :batch-type batch-type))))
64 changes: 49 additions & 15 deletions cortex/src/cortex/dataset.clj
Original file line number Diff line number Diff line change
Expand Up @@ -124,12 +124,59 @@ or a more complex shape definition a layout, num-channels, width and height")
and I should get back a potentially lazy sequence of batches, each batch has a
vector of items
([(image image image)(label label label)(hist hist hist)]
[(image image image)(label label label)(hist hist hist)])"
[(image image image)(label label label)(hist hist hist)])
Put another way, within each batch the data is columnar in the order requested
by shape-name-seq."
))


(defn batches->columns
"Given a batch sequence from get-batches
transform it so that it is a vector of columnar data,
one column for each item requested from the batch."
[batch-sequence]
(when (and (not (empty? batch-sequence))
(not (empty? (first batch-sequence))))
(mapv (fn [idx]
(mapcat #(nth % idx) batch-sequence))
(range (count (first batch-sequence))))))


(defn get-data-sequence-from-dataset
"Get a sequence of data from the dataset. Takes a batch size because
datasets always give data in batches. Note that if you are taking the
evaluation results from a network with a given batch size you should call
this function with the same batch type (probably holdout) and batch-size
as what you used in the run call."
[dataset name batch-type batch-size]
(->> (get-batches dataset batch-size batch-type [name])
batches->columns
first))


(defn- recur-column-data->column-groups
[name-seq-seq column-data]
(when-let [next-name-seq (first name-seq-seq)]
(cons (vec (take (count next-name-seq) column-data))
(lazy-seq (recur-column-data->column-groups
(rest name-seq-seq)
(drop (count next-name-seq) column-data))))))


;;Data shape map is a map of name-> {:data [large randomly addressable sequence of data] :shape (integer or image shape)}
(defn batch-sequence->column-groups
"Given a sequence of sequences of names to pull from the dataset,
return a sequence of columnar data vectors in the same order as the
name sequences"
[dataset batch-size batch-type name-seq-seq]
(->> (flatten name-seq-seq)
(get-batches dataset batch-size batch-type)
batches->columns
(recur-column-data->column-groups name-seq-seq)))


;;Data shape map is a map of name->
;;{:data [large randomly addressable sequence of data] :shape (integer or image shape)}
;;Index sets are either a map of batch-type->index sequence *or* just a sequence of indexes
(defrecord InMemoryDataset [data-shape-map index-sets]
PDataset
Expand Down Expand Up @@ -193,19 +240,6 @@ or a more complex shape definition a layout, num-channels, width and height")
(->TakeNDataset dataset max-sample-count-or-limit-map))


(defn get-data-sequence-from-dataset
"Get a sequence of data from the dataset. Takes a batch size because
datasets always give data in batches. Note that if you are taking the
evaluation results from a network with a given batch size you should call
this function with the same batch type (probably holdout) and batch-size
as what you used in the run call."
[dataset name batch-type batch-size]
(let [batch-data (get-batches dataset batch-size batch-type [name])]
(mapcat first batch-data)))




(defrecord InfiniteDataset [shape-map cv-seq-fn holdout-seq-fn
training-seq-fn
sequence->map-fn
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
(ns suite-classification.classify
(:require [think.gate.core :as gate]
[cortex.suite.classify :as classify]))


(defmethod gate/component "default"
[& args]
(apply classify/classify-component args))



(gate/start-frontend)
30 changes: 28 additions & 2 deletions examples/suite-classification/project.clj
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,33 @@
:url "http://www.eclipse.org/legal/epl-v10.html"}
:dependencies [[org.clojure/clojure "1.8.0"]
[thinktopic/cortex.suite "0.3.1-SNAPSHOT"]
;;If you have cuda-8.0 installed then add this:
[thinktopic/gpu-compute "0.3.1-SNAPSHOT"]]
;;Default way of displaying anything is a web page.
;;Because if you want to train on aws (which you should)
;;you need to get simple servers up and running easily.
[thinktopic/think.gate "0.1.1"]
;;If you need cuda 8...
;;[org.bytedeco.javacpp-presets/cuda "8.0-1.2"]
]

:figwheel {:css-dirs ["resources/public/css"]}


:source-paths ["src"]

:clean-targets ^{:protect false} ["pom.xml"
"target"
"resources/public/out"
"resources/public/js/app.js"
"figwheel_server.log"]

:cljsbuild {:builds
[{:id "dev"
:figwheel true
:source-paths ["cljs"]
:compiler {:main "suite-classification.classify"
:asset-path "out"
:output-to "resources/public/js/app.js"
:output-dir "resources/public/out"}}]}

:main suite-classification.main
:aot [suite-classification.main])
Empty file.
4 changes: 4 additions & 0 deletions examples/suite-classification/src/css/styles.clj
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
(ns css.styles
(:require [cortex.suite.css-styles :as suite-styles]))

(def styles suite-styles/styles)
Loading

0 comments on commit 0201405

Please sign in to comment.