Skip to content

Commit

Permalink
Merge UI (#263)
Browse files Browse the repository at this point in the history
  • Loading branch information
zaibacu committed Mar 1, 2021
1 parent 32fdf3f commit 137bdc4
Show file tree
Hide file tree
Showing 97 changed files with 3,034 additions and 1,710 deletions.
5 changes: 5 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
*/.idea/
*/node_modules/
*/dist/
Dockerfile
Dockerfile.builder
5 changes: 5 additions & 0 deletions api/resources/datomic-schema/2019-10-28-initial-schema.edn
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,11 @@
:db/cardinality :db.cardinality/one
:db/doc "Data sample row number"}

{:db/ident :document-plan/data-sample-method
:db/valueType :db.type/string
:db/cardinality :db.cardinality/one
:db/doc "Data sample method"}

{:db/ident :document-plan/update-count
:db/valueType :db.type/long
:db/cardinality :db.cardinality/one
Expand Down
9 changes: 9 additions & 0 deletions api/resources/schema.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,7 @@ type DocumentPlan {
documentPlan: String!
dataSampleId: ID
dataSampleRow: Int
dataSampleMethod: String
createdAt: Int!
updatedAt: Int
updateCount: Int!
Expand Down Expand Up @@ -247,6 +248,12 @@ type Query {
recordLimit: Int
): DataFile

getRelevantSamples(
id: ID!
method: String!
recordLimit: Int
): DataFile

listDataFiles(
offset: Int
limit: Int
Expand Down Expand Up @@ -355,6 +362,7 @@ type Mutation {
documentPlan: String!
dataSampleId: ID
dataSampleRow: Int
dataSampleMethod: String
): DocumentPlan!

deleteDocumentPlan(
Expand All @@ -370,6 +378,7 @@ type Mutation {
documentPlan: String
dataSampleId: ID
dataSampleRow: Int
dataSampleMethod: String
): DocumentPlan!

createPhrase(
Expand Down
3 changes: 2 additions & 1 deletion api/src/api/config.clj
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
:enabled-readers (set (read-conf-line (or (System/getenv "ENABLED_READERS") "")))
:config-path (or (System/getenv "CONFIG_PATH") (io/resource "config"))
:dictionary-path (or (System/getenv "DICT_PATH") (io/resource "dictionary"))
:document-plan-path (or (System/getenv "DOCUMENT_PLANS") (io/resource "document-plans"))})
:document-plan-path (or (System/getenv "DOCUMENT_PLANS") (io/resource "document-plans"))
:relevant-items-limit (or (System/getenv "RELEVANT_ITEMS_MATRIX_LIMIT") 100)})

(defstate conf :start (load-config))
26 changes: 14 additions & 12 deletions api/src/api/graphql/core.clj
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,19 @@
(-> "schema.graphql"
(io/resource)
slurp
(parser/parse-schema {:resolvers {:Query {:listDataFiles :list-data-files
:documentPlan :document-plan
:documentPlans :document-plans
:getDataFile :get-data-file
:dictionary :dictionary
:dictionaryItem :dictionary-item
:readerFlags :reader-flags
:languages :languages
:concepts :concepts
:concept :concept
:searchThesaurus :search-thesaurus
:synonyms :synonyms}
(parser/parse-schema {:resolvers {:Query {:listDataFiles :list-data-files
:documentPlan :document-plan
:documentPlans :document-plans
:getRelevantSamples :get-relevant-samples
:getDataFile :get-data-file
:dictionary :dictionary
:dictionaryItem :dictionary-item
:readerFlags :reader-flags
:languages :languages
:concepts :concepts
:concept :concept
:searchThesaurus :search-thesaurus
:synonyms :synonyms}
:Mutation {:createDictionaryItem :create-dictionary-item
:deleteDictionaryItem :delete-dictionary-item
:updateDictionaryItem :update-dictionary-item
Expand Down Expand Up @@ -66,6 +67,7 @@
:concept #'concept-domain/get-concept
:list-data-files #'data-domain/list-data-files
:get-data-file #'data-domain/get-data-file
:get-relevant-samples #'data-domain/get-relevant-samples
:create-data-file #'data-domain/create-data-file})
schema/compile))

Expand Down
9 changes: 9 additions & 0 deletions api/src/api/graphql/domain/data.clj
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,15 @@
(resolve-as data-file)
(resolve-as-not-found-file id)))

(defn get-relevant-samples [_ {:keys [id method recordOffset recordLimit]
:or {recordOffset 0 recordLimit 20 method "relevant"}} _]
(let [data-fn (case method
"relevant" data-files/fetch-most-relevant
"first" data-files/fetch)]
(if-let [data-file (data-fn id recordOffset recordLimit)]
(resolve-as data-file)
(resolve-as-not-found-file id))))

(defn list-data-files [_ {:keys [offset limit recordOffset recordLimit]
:or {offset 0 limit 20 recordOffset 0 recordLimit 20}} _]
(resolve-as (data-files/listing offset limit recordOffset recordLimit)))
Expand Down
19 changes: 10 additions & 9 deletions api/src/api/graphql/translate/document_plan.clj
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,16 @@
(:require [api.utils :refer [read-mapper]]
[jsonista.core :as json]))

(defn schema->dp [{:keys [id uid name kind blocklyXml documentPlan dataSampleId dataSampleRow]}]
{:id id
:uid uid
:name name
:kind kind
:blocklyXml blocklyXml
:documentPlan (json/read-value documentPlan read-mapper)
:dataSampleId dataSampleId
:dataSampleRow dataSampleRow})
(defn schema->dp [{:keys [id uid name kind blocklyXml documentPlan dataSampleId dataSampleRow dataSampleMethod]}]
{:id id
:uid uid
:name name
:kind kind
:blocklyXml blocklyXml
:documentPlan (json/read-value documentPlan read-mapper)
:dataSampleId dataSampleId
:dataSampleRow dataSampleRow
:dataSampleMethod dataSampleMethod})

(defn dp->schema [dp]
(update dp :documentPlan json/write-value-as-string))
5 changes: 3 additions & 2 deletions api/src/api/nlg/service.clj
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
::request/documentPlanName
::request/dataId
::request/dataRow
::request/sampleMethod
::request/readerFlagValues]))

(s/def ::generate-request-bulk
Expand All @@ -30,7 +31,7 @@
(Boolean/valueOf (System/getenv "DISPLAY_ERROR")))

(defn generate-request
[{data-id :dataId data-row :dataRow reader-model :readerFlagValues :as request}]
[{data-id :dataId sample-method :sampleMethod data-row :dataRow reader-model :readerFlagValues :as request}]
(try
(log/infof "Generate request with %s" (utils/request->text request))
(let [{row-index :dataSampleRow :as document-plan} (utils/get-document-plan request)
Expand All @@ -39,7 +40,7 @@
:status :pending})
(results/write (generate-text {:id result-id
:document-plan document-plan
:data (or data-row (utils/get-data-row data-id (or row-index 0)) {})
:data (or data-row (utils/get-data-row data-id (or sample-method "first") (or row-index 0)) {})
:reader-model (map reader-model/update! (utils/form-reader-model reader-model))}))
{:status 200
:body {:resultId result-id}})
Expand Down
7 changes: 5 additions & 2 deletions api/src/api/nlg/service/utils.clj
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,12 @@
(remove nil?)
(str/join "; ")))

(defn get-data-row [data-id index]
(defn get-data-row [data-id sample-method index]
(log/infof "Sample Method: %s" sample-method)
(when-not (str/blank? data-id)
(if-let [{[{fields :fields}] :records filename :fileName} (data-files/fetch data-id index 1)]
(if-let [{[{fields :fields}] :records filename :fileName} (case sample-method
"relevant" (data-files/fetch-most-relevant data-id index 20)
"first" (data-files/fetch data-id index 1))]
(cond->> (zipmap (map :fieldName fields) (map :value fields))
(data-enrich/enable-enrich?) (data-enrich/enrich filename))
(log/errorf "Data with id `%s` not found" data-id))))
Expand Down
77 changes: 40 additions & 37 deletions api/src/data/datomic/entities/document_plan.clj
Original file line number Diff line number Diff line change
Expand Up @@ -46,18 +46,19 @@
(defn transact-item [conn key data-item]
(let [current-ts (utils/ts-now)]
@(d/transact conn [(remove-nil-vals
{:document-plan/id key
:document-plan/uid (:uid data-item)
:document-plan/data-sample-id (:dataSampleId data-item)
:document-plan/name (:name data-item)
:document-plan/kind (:kind data-item)
:document-plan/examples (:examples data-item)
:document-plan/blockly-xml (:blocklyXml data-item)
:document-plan/document-plan (prepare-document-plan (:documentPlan data-item))
:document-plan/created-at current-ts
:document-plan/updated-at current-ts
:document-plan/data-sample-row (:dataSampleRow data-item)
:document-plan/update-count 0})])
{:document-plan/id key
:document-plan/uid (:uid data-item)
:document-plan/data-sample-id (:dataSampleId data-item)
:document-plan/name (:name data-item)
:document-plan/kind (:kind data-item)
:document-plan/examples (:examples data-item)
:document-plan/blockly-xml (:blocklyXml data-item)
:document-plan/document-plan (prepare-document-plan (:documentPlan data-item))
:document-plan/created-at current-ts
:document-plan/updated-at current-ts
:document-plan/data-sample-row (:dataSampleRow data-item)
:document-plan/data-sample-method (:dataSampleMethod data-item)
:document-plan/update-count 0})])
(assoc data-item
:id key
:createdAt current-ts
Expand Down Expand Up @@ -96,18 +97,19 @@
:dictionaryItem (doc-plan->document-plan (:blockly/dictionary-item document-plan))})))

(defn dp->dp [document-plan]
{:id (:document-plan/id document-plan)
:uid (:document-plan/uid document-plan)
:name (:document-plan/name document-plan)
:kind (:document-plan/kind document-plan)
:examples (:document-plan/examples document-plan)
:blocklyXml (:document-plan/blockly-xml document-plan)
:documentPlan (doc-plan->document-plan (:document-plan/document-plan document-plan))
:createdAt (:document-plan/created-at document-plan)
:updatedAt (:document-plan/updated-at document-plan)
:dataSampleRow (:document-plan/data-sample-row document-plan)
:dataSampleId (:document-plan/data-sample-id document-plan)
:updateCount (:document-plan/update-count document-plan)})
{:id (:document-plan/id document-plan)
:uid (:document-plan/uid document-plan)
:name (:document-plan/name document-plan)
:kind (:document-plan/kind document-plan)
:examples (:document-plan/examples document-plan)
:blocklyXml (:document-plan/blockly-xml document-plan)
:documentPlan (doc-plan->document-plan (:document-plan/document-plan document-plan))
:createdAt (:document-plan/created-at document-plan)
:updatedAt (:document-plan/updated-at document-plan)
:dataSampleRow (:document-plan/data-sample-row document-plan)
:dataSampleId (:document-plan/data-sample-id document-plan)
:dataSampleMethod (:document-plan/data-sample-method document-plan)
:updateCount (:document-plan/update-count document-plan)})

(defn pull-entity [conn key]
(let [document-plan (ffirst (d/q '[:find (pull ?e [*])
Expand All @@ -128,19 +130,20 @@
(let [original (pull-entity conn key)
current-ts (utils/ts-now)]
@(d/transact conn [(remove-nil-vals
{:db/id [:document-plan/id key]
:document-plan/uid (:uid data-item)
:document-plan/data-sample-id (:dataSampleId data-item)
:document-plan/name (:name data-item)
:document-plan/kind (:kind data-item)
:document-plan/examples (:examples data-item)
:document-plan/blockly-xml (:blocklyXml data-item)
:document-plan/document-plan (prepare-document-plan (:documentPlan data-item))
:document-plan/updated-at current-ts
:document-plan/data-sample-row (:dataSampleRow data-item)
:document-plan/update-count (if (some? (:updateCount original))
(inc (:updateCount original))
0)})])
{:db/id [:document-plan/id key]
:document-plan/uid (:uid data-item)
:document-plan/data-sample-id (:dataSampleId data-item)
:document-plan/name (:name data-item)
:document-plan/kind (:kind data-item)
:document-plan/examples (:examples data-item)
:document-plan/blockly-xml (:blocklyXml data-item)
:document-plan/document-plan (prepare-document-plan (:documentPlan data-item))
:document-plan/updated-at current-ts
:document-plan/data-sample-row (:dataSampleRow data-item)
:document-plan/data-sample-method (:dataSampleMethod data-item)
:document-plan/update-count (if (some? (:updateCount original))
(inc (:updateCount original))
0)})])
(pull-entity conn key)))

(defn delete [conn key]
Expand Down
24 changes: 24 additions & 0 deletions api/src/data/entities/data_files.clj
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
[clojure.tools.logging :as log]
[data.db :as db]
[data.utils :as utils]
[data.entities.data-files.row-selection :as row-selection]
[data.spec.data-file :as data-file]
[dk.ative.docjure.spreadsheet :as excel]
[mount.core :refer [defstate]]))
Expand Down Expand Up @@ -85,6 +86,29 @@
:recordLimit limit
:recordCount total}))

(defn fetch-most-relevant [id offset limit]
(let [{:keys [filename header rows total]} (some-> id (read-data-file) (parse-data))
sampled-rows (row-selection/sample rows (:relevant-items-limit conf))
m (row-selection/distance-matrix sampled-rows)
selected-rows (drop offset (row-selection/select-rows m sampled-rows limit))]
{:id id
:fileName filename
:fieldNames header
:records (map (fn [row record]
{:id (str id ":" row)
:fields (map (fn [column field-name value]
{:id (str id ":" row ":" column)
:fieldName field-name
:value value})
(range)
header
record)})
(range 0 limit)
selected-rows)
:recordOffset 0
:recordLimit limit
:recordCount total}))

(defn fetch [id offset limit]
(some-> id
(read-data-file)
Expand Down
53 changes: 53 additions & 0 deletions api/src/data/entities/data_files/row_selection.clj
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
(ns data.entities.data-files.row-selection
(:require [clojure.set :as set]
[clojure.tools.logging :as log]
[data.utils :refer [murmur-hash]]))

(defn hash-row [row]
(map-indexed (fn [idx k] (murmur-hash (str idx ":" k))) row))

(defn jaccard-distance [d1 d2]
(if (not= d1 d2)
(let [k1 (set d1)
k2 (set d2)]
(- 1 (/ (count (set/intersection k1 k2)) (count (set/union k1 k2)))))
0))

(defn distance-matrix [rows]
(let [hashed-rows (map (fn [row] (hash-row row)) rows)]
(into
{}
(map-indexed
(fn [id1 r1]
[id1 (into {} (remove
(fn [[idx _]] (= id1 idx))
(map-indexed (fn [id2 r2] [id2 (jaccard-distance r1 r2)]) hashed-rows)))])
hashed-rows))))

(defn select-rows [m rows limit]
(loop [results [0]
next 0]
(if (or (= limit (count results)) (= (count results) (count rows)))
(do
(log/debugf "Result incides: %s" results)
(map (fn [r] (nth rows r)) results))

(let [too-close? (fn [[_ x]] (< x (/ 1 10)))
available-rows (remove (fn [[idx _]] (contains? (set results) idx)) (get m next)) ;; Rows which are not yet in result
distant-rows (remove (fn [[idx _]] ;; Rows who are far enough from previous results
(some too-close? (-> (get m idx) (select-keys (take-last 3 results)))))
available-rows)
k (key (apply max-key val (if (empty? distant-rows) available-rows distant-rows)))]
(recur (conj results k) k)))))


(defn sample [col limit]
(if (> (count col) limit)
(let [l (count col)
step (- (/ l limit) 1)]
(loop [[head & tail] col
result []]
(if (= (count result) limit)
result
(recur (drop step tail) (conj result head)))))
col))

0 comments on commit 137bdc4

Please sign in to comment.