Permalink
Browse files

First working version. Parse all lines in all files and index them in…

… solr.
  • Loading branch information...
1 parent 1a33c8b commit d6d7d54b7bbf09a10850a14a7ad19e0f4e3753c0 @julosaure committed Aug 16, 2012
Showing with 46 additions and 9 deletions.
  1. +46 −9 src/SduDataImporter/core.clj
@@ -1,30 +1,67 @@
(ns SduDataImporter.core
(:require clojure.java.io)
(:require clojure-solr)
- (:require clojure.string))
+ (:require clojure.string)
+ (:import [java.io File]))
+
+;; GLOBALS
(def columnSeparator #"\|")
+(def firstColName "Activity Row Id")
+
(def columnNames ["activity row id" "created date" "primary contact id" "contact integration id" "case" "member" "parish data" "start time" "call type" "type" "inin id" "parish" "interaction time" "resolution" "notes" "division" "program" "task status" "contact external unique id" "subject" "description" "contact type" "caller id" "crmit inin id" "validation status" "worker office number"])
+(def connection (clojure-solr/connect "http://127.0.0.1:8983/solr"))
+
+(def patDateTime #"(\d{2})/(\d{2})/(\d{4}) (\d{2}:\d{2}:\d{2})")
+
+(def dirToProcess "/Users/julien/Documents/Output/")
+
+(def patFilesToProcess #"LAActivity_AIMA_Part_(.*)\.txt")
+
+;; FUNCTIONS
+
+(defn changeDateFormat [date]
+ (let [match (re-matches patDateTime date)]
+ (if match
+ (str (nth match 3) "-" (nth match 2) "-" (nth match 1) "T" (nth match 4) "Z"))))
+
(defn lineToMap [line]
- (zipmap columnNames (clojure.string/split line columnSeparator)))
-
+ (let [mapLine (zipmap columnNames (clojure.string/split line columnSeparator))
+ keys ["created date" "start time"]]
+ ;(println (str mapLine))
+ (reduce (fn [m k] (assoc m k (changeDateFormat (m k)))) mapLine keys)
+ ))
+
(defn indexLine [line]
- (println (str (lineToMap line)))
- )
+ (let [mapLine (lineToMap line)]
+ (if-not (= 0 (compare firstColName (mapLine "activity row id")))
+ (clojure-solr/with-connection connection
+ (clojure-solr/add-document! mapLine)))))
+
(defn parseFile [fileName]
+ (println (str "Reading file " (.getName fileName)))
(with-open [rdr (clojure.java.io/reader fileName)]
(doseq [line (line-seq rdr)]
- (indexLine line))))
+ ;(println (str line))
+ ;(println (str (clojure.string/split line columnSeparator)))
+ (indexLine line)))
+ (clojure-solr/with-connection connection
+ (clojure-solr/commit!)))
+(defn parseDir [dirName]
+ (println (str "Reading directory " dirName))
+ (doseq [f (.listFiles (File. dirName))]
+ (if (re-matches patFilesToProcess (.getName f))
+ (parseFile f))))
(defn -main
- [fileName]
- (println (str "We will read " fileName))
- (parseFile fileName))
+ []
+ (println (str "We will read directory " dirToProcess))
+ (parseDir dirToProcess))

0 comments on commit d6d7d54

Please sign in to comment.