Skip to content

Commit

Permalink
Implement summary function for DateTime columns
Browse files Browse the repository at this point in the history
  • Loading branch information
ray1729 committed Feb 18, 2012
1 parent 6a24385 commit 2588804
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 21 deletions.
3 changes: 2 additions & 1 deletion modules/incanter-core/project.clj
Expand Up @@ -6,7 +6,8 @@
org.clojure/clojure-contrib]] org.clojure/clojure-contrib]]
[incanter/parallelcolt "0.9.4" [incanter/parallelcolt "0.9.4"
:exclusions [org.clojure/clojure :exclusions [org.clojure/clojure
org.clojure/clojure-contrib]]] org.clojure/clojure-contrib]]
[clj-time "0.3.6"]]
:dev-dependencies [[lein-clojars "0.7.0" :dev-dependencies [[lein-clojars "0.7.0"
:exclusions [org.clojure/clojure :exclusions [org.clojure/clojure
org.clojure/clojure-contrib]]] org.clojure/clojure-contrib]]]
Expand Down
38 changes: 21 additions & 17 deletions modules/incanter-core/src/incanter/stats.clj
Expand Up @@ -37,6 +37,7 @@
(cern.jet.stat.tdouble DoubleDescriptive (cern.jet.stat.tdouble DoubleDescriptive
Probability) Probability)
(incanter Weibull)) (incanter Weibull))
(:require [clj-time.coerce :as ctime])
(:use [clojure.set :only [difference intersection union]]) (:use [clojure.set :only [difference intersection union]])
(:use [incanter.core :only ($ abs plus minus div mult mmult to-list bind-columns (:use [incanter.core :only ($ abs plus minus div mult mmult to-list bind-columns
gamma pow sqrt diag trans regularized-beta ncol gamma pow sqrt diag trans regularized-beta ncol
Expand Down Expand Up @@ -2582,39 +2583,42 @@ Test for different variances between 2 samples
{:col col :min (reduce min (remove nil? ($ col ds))) :max (reduce max (remove nil? ($ col ds))) {:col col :min (reduce min (remove nil? ($ col ds))) :max (reduce max (remove nil? ($ col ds)))
:mean (mean (remove nil? ($ col ds))) :median (median (remove nil? ($ col ds))) :is-numeric true})) :mean (mean (remove nil? ($ col ds))) :median (median (remove nil? ($ col ds))) :is-numeric true}))



(defn category-col-summarizer (defn category-col-summarizer
"Returns a summarizer function which takes a category column and returns a list of the top 5 columns by volume, and a "Returns a summarizer function which takes a category column and returns a list of the top 5 columns by volume, and a
count of remaining rows" count of remaining rows"
([col ds] ([col ds]
(let [freqs (frequencies ($ col ds)) top-5 (take 5 (reverse (sort-by val freqs)))] (let [freqs (frequencies ($ col ds)) top-5 (take 5 (reverse (sort-by val freqs)))]
(into {:col col :count (- (reduce + (map val freqs)) (reduce + (map val (into {} top-5)))) :is-numeric false} top-5)))) (into {:col col :count (- (reduce + (map val freqs)) (reduce + (map val (into {} top-5)))) :is-numeric false} top-5))))


(defn date-col-summarizer
"Returns a summarizer function that takes a column with DateTime values"
([col ds]
(let [vs (map ctime/to-long (remove nil? ($ col ds)))]
{:col col :min (ctime/from-long (reduce min vs)) :max (ctime/from-long (reduce max vs))
:mean (ctime/from-long (long (mean vs))) :median (ctime/from-long (long (median vs))) :is-numeric false})))


(defn choose-singletype-col-summarizer (defn choose-singletype-col-summarizer
"Takes in a type, and returns a suitable column summarizer" "Takes in a type, and returns a suitable column summarizer"
([col-type] ([col-type]
(if (.isAssignableFrom java.lang.Number col-type) (cond
numeric-col-summarizer (.isAssignableFrom java.lang.Number col-type) numeric-col-summarizer
(if (or (.isAssignableFrom java.lang.String col-type) (.isAssignableFrom clojure.lang.Keyword col-type)) (.isAssignableFrom java.lang.String col-type) category-col-summarizer
category-col-summarizer (.isAssignableFrom clojure.lang.Keyword col-type) category-col-summarizer
; FIXME Deal with date columns (.isAssignableFrom org.joda.time.DateTime col-type) date-col-summarizer
(str "Don't know how to summarize a column of type: " col-type) :else (str "Don't know how to summarize a column of type: " col-type))))
))))



(defn summarizer-fn (defn summarizer-fn
"Takes in a column (number or name) and a dataset. Returns a function to summarize the column if summarizable, and a "Takes in a column (number or name) and a dataset. Returns a function to summarize the column if summarizable, and a
string describing why the column can't be summarized in the event that it can't" string describing why the column can't be summarized in the event that it can't"
([col ds] ([col ds]
(let [type-counts (dissoc (count-col-types col ds) nil)] (let [type-counts (dissoc (count-col-types col ds) nil)]
(if (= 1 (count type-counts)) (cond
(choose-singletype-col-summarizer (nth (keys type-counts) 0)) (= 1 (count type-counts)) (choose-singletype-col-summarizer (nth (keys type-counts) 0))
(if (every? #(.isAssignableFrom java.lang.Number %) (keys type-counts)) (every? #(.isAssignableFrom java.lang.Number %) (keys type-counts)) numeric-col-summarizer
numeric-col-summarizer (and (= 2 (count type-counts)) (contains? type-counts java.lang.String) (contains? type-counts clojure.lang.Keyword)) category-col-summarizer
(if (and (= 2 (count type-counts)) (contains? type-counts java.lang.String) (contains? type-counts clojure.lang.Keyword)) (every? #(.isAssignableFrom org.joda.time.DateTime %) (keys type-counts)) date-col-summarizer
category-col-summarizer :else (stat-summarizable type-counts)))))
(stat-summarizable type-counts)))))))


(defn summarizable? (defn summarizable?
"Takes in a column name (or number) and a dataset. Returns true if the column can be summarized, and false otherwise" "Takes in a column name (or number) and a dataset. Returns true if the column can be summarized, and false otherwise"
Expand Down
15 changes: 13 additions & 2 deletions modules/incanter-core/test/incanter/stats_tests.clj
Expand Up @@ -21,7 +21,8 @@


(ns incanter.stats-tests (ns incanter.stats-tests
(:use clojure.test (:use clojure.test
(incanter core stats))) (incanter core stats))
(:require [clj-time.core :as ct]))


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; UNIT TESTS FOR incanter.stats.clj ;; UNIT TESTS FOR incanter.stats.clj
Expand Down Expand Up @@ -78,6 +79,8 @@
(def summary-ds8 (to-dataset [["a"] ["b"] ["c"] ["d"] ["b"] ["e"] ["a"] ["b"] ["f"] ["a"] ["b"] ["e"]])) (def summary-ds8 (to-dataset [["a"] ["b"] ["c"] ["d"] ["b"] ["e"] ["a"] ["b"] ["f"] ["a"] ["b"] ["e"]]))
(def summary-ds9 (to-dataset [["a" 1.2] [":b" 3] [:c 0.1] ["d" 8] ["b" 9] ["e" 7.21] ["a" 1E1] ["b" 6.0000] ["f" 1e-2] ["a" 3.0] ["b" 4] ["e" 5]])) (def summary-ds9 (to-dataset [["a" 1.2] [":b" 3] [:c 0.1] ["d" 8] ["b" 9] ["e" 7.21] ["a" 1E1] ["b" 6.0000] ["f" 1e-2] ["a" 3.0] ["b" 4] ["e" 5]]))


(def summary-ds10 (to-dataset (map #(ct/date-time 2012 02 %) (range 1 10))))



(deftest mean-test (deftest mean-test
(is (= (map mean (trans test-mat)) [108.0 130.0]))) (is (= (map mean (trans test-mat)) [108.0 130.0])))
Expand Down Expand Up @@ -280,7 +283,15 @@
(is (not (summarizable? 0 summary-ds5))) (is (not (summarizable? 0 summary-ds5)))
(is (not (summarizable? 0 summary-ds6))) (is (not (summarizable? 0 summary-ds6)))
(is (summarizable? 0 summary-ds7)) (is (summarizable? 0 summary-ds7))
) (is (summarizable? 0 summary-ds10)))

(deftest summarize-date-column
(let [s (first (summary summary-ds10))]
(is (:min s) (ct/date-time 2012 02 1))
(is (:max s) (ct/date-time 2012 02 9))
(is (:mean s) (ct/date-time 2012 02 5))
(is (:median s) (ct/date-time 2012 02 5))
(is (not (:is-numeric? s)))))


(deftest simple-p-value-test (deftest simple-p-value-test
(testing "Basic p-value testing" (testing "Basic p-value testing"
Expand Down
3 changes: 2 additions & 1 deletion project.clj
Expand Up @@ -28,7 +28,8 @@
[swingrepl "1.3.0" [swingrepl "1.3.0"
:exclusions [org.clojure/clojure :exclusions [org.clojure/clojure
org.clojure/clojure-contrib]] org.clojure/clojure-contrib]]
[jline "0.9.94"]] [jline "0.9.94"]
[clj-time "0.3.6"]]
:dev-dependencies [[lein-clojars "0.7.0" :dev-dependencies [[lein-clojars "0.7.0"
:exclusions [org.clojure/clojure :exclusions [org.clojure/clojure
org.clojure/clojure-contrib]]] org.clojure/clojure-contrib]]]
Expand Down

0 comments on commit 2588804

Please sign in to comment.