# Clojupyter + oz example

This notebook demonstrates basic usage of oz from a Jupyter notebook using the Clojuypyter kernel.

Note that the visualizations below may not render if you're looking at this on GitHub, but will render if you load the notebook into a running Jupyter instance.

In [1]:
(require '[clojupyter.misc.helper :as helper])
(clojupyter.misc.stacktrace/set-print-stacktraces! true)
(helper/add-dependencies '[techascent/tech.ml "1.0-alpha3"])

{[org.apache.commons/commons-math3 "3.6.1"] nil, [com.github.fommil.netlib/native_ref-java "1.1"] #{[com.github.fommil/jniloader "1.1"]}, [org.scala-lang/scala-reflect "2.11.12"] nil, [com.tdunning/t-digest "3.2"] nil, [com.taoensso/timbre "4.10.0"] nil, [org.iq80.snappy/snappy "0.4"] nil, [org.checkerframework/checker-qual "2.0.0"] nil, [org.clojure/tools.reader "1.3.2"] nil, [us.bpsm/edn-java "0.4.7"] nil, [net.sourceforge.f2j/arpack_combined_all "0.1"] nil, [techascent/tech.parallel "1.3"] nil, [camel-snake-kebab "0.4.0"] nil, [org.clojure/tools.macro "0.1.5"] nil, [org.clojure/clojure "1.10.1-beta2"] nil, [org.slf4j/slf4j-api "1.7.25"] nil, [com.github.haifengl/smile-data "1.5.2"] nil, [org.objenesis/objenesis "1.2"] nil, [com.taoensso/truss "1.5.0"] nil, [clj-commons/fs "1.5.0"] nil, [com.univocity/univocity-parsers "2.7.5"] nil, [tech.tablesaw/tablesaw-core "0.30.2"] #{[com.univocity/univocity-parsers "2.7.5"] [org.roaringbitmap/RoaringBitmap "0.7.14"] [org.jsoup/jsoup "1.11.3"] 

In [2]:
(helper/add-dependencies '[metasoarous/oz "1.6.0-alpha2"])
(require '[oz.notebook.clojupyter :as oz])

(require '[tech.libs.smile.utils :as smile-utils])

(require '[tech.ml.dataset.pipeline
           ;;We use col a lot, and int map is similar
           :refer [col]
           :as dsp])
(require '[tech.ml.dataset.pipeline.column-filters :as cf])
(require '[tech.v2.datatype :as dtype])
(require '[tech.v2.datatype.functional :as dfn])
(require '[tech.ml.dataset :as ds])
(require '[tech.ml.dataset.column :as ds-col])
(require '[tech.ml :as ml])
(require '[tech.ml.loss :as loss])
(require '[tech.ml.utils :as ml-utils])
(require '[tech.ml.regression :as ml-regression])
(require '[tech.ml.visualization.vega :as vega-viz])
(require '[clojure.core.matrix :as m])

;;use tablesaw as dataset backing store
(require '[tech.libs.tablesaw :as tablesaw])

;;model generators
(require '[tech.libs.xgboost])
(require '[tech.libs.smile.regression])

;;put/get nippy
(require '[tech.io :as io])
(require '[clojure.pprint :as pp])
(require '[clojure.set :as c-set])

nil

In [3]:
(import '[java.io File])


(defn pp-str
  [ds]
  (with-out-str
    (pp/pprint ds)))


(defn print-table
  ([ks data]
     (->> data
          (map (fn [item-map]
                 (->> item-map
                      (map (fn [[k v]]
                             [k (if (or (float? v)
                                        (double? v))
                                  (format "%.3f" v)
                                  v)]))
                      (into {}))))
          (pp/print-table ks)))
  ([data]
   (print-table (sort (keys (first data))) data)))

#'user/print-table

In [7]:
(def tmp-data (slurp "https://raw.githubusercontent.com/kphaser/ames-house-prices/master/data/train.csv"))
(spit "train.csv" (slurp "https://raw.githubusercontent.com/kphaser/ames-house-prices/master/data/train.csv"))

nil

In [9]:
(def src-dataset (tablesaw/path->tablesaw-dataset "train.csv"))
(println (m/shape src-dataset))

[81 1460]


nil

In [10]:
(-> [:vega-lite {:data {:values
                                (-> src-dataset
                                (ds/select ["SalePrice" "GrLivArea"] :all)
                                (ds/->flyweight))}
                     :mark :point
                     :encoding {:y {:field "SalePrice"
                                    :type :quantitative}
                                :x {:field "GrLivArea"
                                    :type :quantitative}}}]
    oz/view!)

In [33]:
(defn play-data [& names]
  (for [n names
        i (range 20)]
    {:time i :item n :quantity (+ (Math/pow (* i (count n)) 0.8) (rand-int (count n)))}))
    
(def stacked-bar
  {:data {:values (play-data "munchkin" "witch" "dog" "lion" "tiger" "bear")}
   :mark "bar"
   :encoding {:x {:field "time"}
              :y {:aggregate "sum"
                  :field "quantity"
                  :type "quantitative"}
              :color {:field "item"}}})
(oz/view! stacked-bar)

In [34]:

              
;; Create spec, then visualize
(def spec
  {:data {:url "https://gist.githubusercontent.com/metasoarous/4e6f781d353322a44b9cd3e4597c532c/raw/cd633d9bb8e0bed4a5b8e66a32b9569ca2147989/cars.json"}
   :mark "point"
   :encoding {
     :x {:field "Horsepower", :type "quantitative"}
     :y {:field "Miles_per_Gallon", :type "quantitative"}
     :color {:field "Origin", :type "nominal"}}})
(oz/view! spec)

In [35]:
(oz/view!
  [:div
   [:h1 "A little hiccup example"]
   [:p "Try drinking a glass of water with your head upside down"]
   [:div {:style {:display "flex" :flex-direction "row"}}
    [:vega-lite spec]
    [:vega-lite stacked-bar]]])

In [36]:
(def vg-spec
  {:style "cell",
   :width 200,
   :height 200,
   :data
   [{:name "source_0",
     :url "https://gist.githubusercontent.com/metasoarous/4e6f781d353322a44b9cd3e4597c532c/raw/cd633d9bb8e0bed4a5b8e66a32b9569ca2147989/cars.json",
     :format {:type "json"}}],
   :marks
   [{:name "marks",
     :type "symbol",
     :style ["point"],
     :from {:data "source_0"},
     :encode
     {:update
      {:stroke {:scale "color", :field "Origin"},
       :x {:scale "x", :field "Horsepower"},
       :y {:scale "y", :field "Miles_per_Gallon"}}}}],
   :scales
   [{:name "x",
     :type "linear",
     :domain {:data "source_0", :field "Horsepower"},
     :range [0 {:signal "width"}]}
    {:name "y",
     :type "linear",
     :domain {:data "source_0", :field "Miles_per_Gallon"},
     :range [{:signal "height"} 0]}
    {:name "color",
     :type "ordinal",
     :domain {:data "source_0", :field "Origin"},
     :range "category"}]})
(oz/view! vg-spec)

In [10]:
(defn shit [x] x)

#'user/shit