Skip to content
Browse files

new udf to bucket numeric data into one of a possible set of values

  • Loading branch information...
1 parent ab5f9dd commit 60a129cbba37f1d151d3e82b74749e78dd94478f karthik k committed
Showing with 28 additions and 1 deletion.
  1. +5 −1 project.clj
  2. +23 −0 src/runa/udfs/bucket_numbers.clj
View
6 project.clj
@@ -5,4 +5,8 @@
[hive/hive-exec "0.5.0"]
[org.apache.hadoop/hadoop-core "0.20.2-dev"]]
:main runa.udfs.time-of-day
- :aot [runa.udfs.day-of-week runa.udfs.time-of-day])
+ :aot [runa.udfs.day-of-week
+ runa.udfs.time-of-day
+ runa.udfs.first-event
+ runa.udfs.last-event
+ runa.udfs.bucket-numbers])
View
23 src/runa/udfs/bucket_numbers.clj
@@ -0,0 +1,23 @@
+(ns runa.udfs.bucket-numbers
+ (:import [org.apache.hadoop.hive.ql.exec UDF])
+ (:import [org.apache.hadoop.io Text])
+ (:import [java.text SimpleDateFormat])
+ (:import [java.util Date])
+ (:require [clojure.string :as sr])
+ (:gen-class
+ :name runa.udfs.Bucket
+ :extends org.apache.hadoop.hive.ql.exec.UDF
+ :methods [[evaluate [org.apache.hadoop.io.Text org.apache.hadoop.io.Text] org.apache.hadoop.io.Text]]))
+
+(defn get-bucket [buckets, value]
+ (apply max
+ (filter #(>= (Float. value) %)
+ (map #(Integer. %)
+ (sr/split buckets #",")))))
+
+(defn #^Text -evaluate
+ "Buckets the content into one of the possible buckets."
+ [this #^Text s #^Text bkt]
+ (when s
+ (Text. (try (str " > " (get-bucket (str bkt) (str s)))
+ (catch Exception e# "")))))

0 comments on commit 60a129c

Please sign in to comment.
Something went wrong with that request. Please try again.