Skip to content

Commit

Permalink
Add date reformatting to data enrichment standard functions (#325)
Browse files Browse the repository at this point in the history
  • Loading branch information
Žygimantas Medelis committed Jun 30, 2021
1 parent 627db74 commit 5a145f0
Show file tree
Hide file tree
Showing 7 changed files with 46 additions and 24 deletions.
15 changes: 14 additions & 1 deletion api/src/api/nlg/enrich/data/transformations.clj
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
(ns api.nlg.enrich.data.transformations
(:require [clojure.string :as str]
[clojure.tools.logging :as log]
[numberwords.core :as nw]))
[numberwords.core :as nw])
(:import java.time.format.DateTimeFormatter))

(defn string->num [^String n]
(if (re-find #"[.]" n) (Float/valueOf n) (Integer/valueOf n)))
Expand Down Expand Up @@ -40,3 +41,15 @@
(defn cleanup
"Cleanup the string using clojure.string/replace"
[s {:keys [regex replacement]}] (str/replace s regex replacement))

(defn reformat-date
"Change input date provided in input formatting to the date formatted
with output format"
[date {:keys [input-format output-format]}]
(let [in-formatter (DateTimeFormatter/ofPattern input-format)
out-formatter (DateTimeFormatter/ofPattern output-format)]
(try
(.format out-formatter (.parse in-formatter date))
(catch Exception _
(log/errorf "Date '%s' can't be parsed" date)
""))))
16 changes: 8 additions & 8 deletions api/src/data/spec.clj
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@
[data.spec.reader-model :as reader-model]
[data.spec.result :as result]))

(s/def ::result (s/keys ::req [::result/id ::result/status]
::opt [::result/rows ::result/error-message]))
(s/def ::result (s/keys :req [::result/id ::result/status]
:opt [::result/rows ::result/error-message]))

(s/def ::results (s/coll-of ::result))

(s/def ::reader-model (s/coll-of (s/keys ::req [::reader-model/code
::reader-model/name
::reader-model/type
::reader-model/enabled?]
::opt [::reader-model/flag
::reader-model/available?])))
(s/def ::reader-model (s/coll-of (s/keys :req [::reader-model/code
::reader-model/name
::reader-model/type
::reader-model/enabled?]
:opt [::reader-model/flag
::reader-model/available?])))
4 changes: 2 additions & 2 deletions api/src/data/spec/result.clj
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

(s/def ::timestamp number?)

(s/def ::row (s/keys ::req [::row/id ::row/text ::row/language]
::opt [::row/annotations ::row/enriched? ::row/readers]))
(s/def ::row (s/keys :req [::row/id ::row/text ::row/language]
:opt [::row/annotations ::row/enriched? ::row/readers]))

(s/def ::rows (s/coll-of ::row))
2 changes: 1 addition & 1 deletion api/src/data/spec/result/row.clj
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,6 @@

(s/def ::enriched? boolean?)

(s/def ::annotation (s/keys ::req [::annotation/id ::annotation/idx ::annotation/text]))
(s/def ::annotation (s/keys :req [::annotation/id ::annotation/idx ::annotation/text]))

(s/def ::annotations (s/coll-of ::annotation))
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
(ns data.data-enrichment-test
(ns api.nlg.enrich.data.data-enrichment-test
(:require [clojure.test :refer [deftest is]]
[api.nlg.enrich.data :refer [enrich read-rules]]))

Expand All @@ -18,6 +18,11 @@
:relation :numberwords.domain/around}}
{:function :api.nlg.enrich.data.transformations/add-symbol
:args {:symbol " USD" :position :back}}]}
{:name-pattern #"Date"
:transformations
[{:function :api.nlg.enrich.data.transformations/reformat-date
:args {:input-format "YYYY-MM-dd HH:mm"
:output-format "YYYY-MM-dd"}}]}
{:name-pattern #"Increase"
:transformations
[{:function :api.nlg.enrich.data.transformations/add-symbol
Expand All @@ -27,22 +32,25 @@
[{"Account" "Gross Sales (ID1220)"
"CurrentPeriod (Q2)" "90447"
"PriorPeriod (Q1)" "82018"
"Increase" "8429"}
"Increase" "8429"
"Date" "2021-05-03 14:56"}
{"Account" "Advertising (ID3011)"
"CurrentPeriod (Q2)" "1280"
"PriorPeriod (Q1)" "1982"
"Increase" "-702"}])
"Increase" "-702"
"Date" "- BAD -"}])

(deftest date-enrichment
(with-redefs [read-rules (fn [] enrich-config)]
(is (= {"Account" "Gross Sales",
"CurrentPeriod (Q2)" "around 90k USD",
"PriorPeriod (Q1)" "around 82k USD",
"Increase" "$8429"}
(is (= {"Account" "Gross Sales"
"CurrentPeriod (Q2)" "around 90k USD"
"PriorPeriod (Q1)" "around 82k USD"
"Increase" "$8429"
"Date" "2021-05-03"}
(enrich "accounts.csv" (first accounts-data))))
(is (= {"Account" "Advertising"
(is (= {"Account" "Advertising"
"CurrentPeriod (Q2)" "around 1k USD"
"PriorPeriod (Q1)" "around 1k USD"
"Increase" "-$702"}
"PriorPeriod (Q1)" "around 1k USD"
"Increase" "-$702"
"Date" ""}
(enrich "accounts.csv" (second accounts-data))))))

Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
(ns data.transformations-test
(ns api.nlg.enrich.data.transformations-test
(:require [clojure.test :refer [deftest is]]
[api.nlg.enrich.data.transformations :as sut]))

Expand Down
1 change: 1 addition & 0 deletions docs/data-enrichment.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,3 +67,4 @@ Accelerated text provides a few transformation functions in its `api.nlg.enrich.
* `number-approximation` - Using [Number Words](https://github.com/tokenmill/numberwords) package turn a number to its numeric expression
* `add-symbol` - Add extra symbol to the front or the back of the value. Useful to add measurements or currency symbols
* `cleanup` - Cleanup the string using clojure.string/replace
* `reformat-date` - Change the date formatting. If your data is in _YYYY-MM-dd_ and you want to go to _YYYY/dd/MM_, use this function and specify correspoding formats in `input-format` and `output-format` arguments.

0 comments on commit 5a145f0

Please sign in to comment.