Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
branch: master
Fetching contributors…

Octocat-spinner-32-eaf2f5

Cannot retrieve contributors at this time

file 78 lines (63 sloc) 2.985 kb
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77

module Main where

import Data.Char
import Data.Int
import Data.List
import Data.Array
import qualified Data.ByteString as L
import qualified Data.ByteString.Char8 as C
import Text.Regex.TDFA

main = do
  --contents <- L.readFile "../data/dbo.AgentActivityLog.sql.clean.unix"
  contents <- L.readFile "../data/dbo.CallDetail.clean"
  let n = head $ L.unpack $ C.singleton '\n'
  let r = head $ L.unpack $ C.singleton '\r'
  let pv = head $ L.unpack $ C.singleton ';'
      
  --let myLines = L.split n contents
  putStrLn "before splitCrlf"
  let myLines = splitCrlf [] contents
  putStrLn "after splitCrlf"
  let myParsedLines = map parseLine2 myLines
  putStrLn "after map parseLine2"
  
  let myParsedEndedLines = map (\bs -> L.snoc (L.snoc bs pv) n ) myParsedLines
  putStrLn "after map snoc"
  
  let result = L.concat myParsedEndedLines
  putStrLn "after concat"
  --L.writeFile "../data/dbo.AgentActivityLog.sql.clean.unix.dated" result
  L.writeFile "../data/dbo.CallDetail.clean.dated.sql" result
  
--parseFile inFile outFile =
-- hIn = mkFileHandle "../data/dbo.CallDetail.clean" ReadMode

splitCrlf :: [L.ByteString] -> L.ByteString -> [L.ByteString]
splitCrlf acc line =
  let pos = searchCrlf line
  in if pos == -1
     then acc ++ [line]
     else if pos == 0
          then splitCrlf (acc ++ [L.empty]) (L.drop 2 line)
          else splitCrlf (acc ++ [L.take pos line]) (L.drop (pos+2) line)
          
               
searchCrlf :: L.ByteString -> Int
searchCrlf line = fromIntegral $ searchCrlf_ 0 line

searchCrlf_ :: Int -> L.ByteString -> Int
searchCrlf_ cpt line =
  let cr = fromIntegral (ord '\r')
      lf = fromIntegral (ord '\n')
      posCr = L.elemIndex cr line
  in case posCr of Nothing -> -1
                   Just pos -> if (L.index line (pos+1)) == lf
                               then pos
                               else searchCrlf_ (cpt+pos) (L.drop pos line)

parseLine2 :: L.ByteString -> L.ByteString
parseLine2 line =
  -- pattern of type '20110629 05:00:25:000' 213 6 51 01 76 22
  let timestampPattern = "'[0-9]{8} ([0-9]{2}:){3}[0-9]{3}'"
      lOffLen = getAllMatches (line =~ timestampPattern :: AllMatches [] (Int, Int))
  -- for each timestamp that matches in a line, insert -
  -- we start from the last pattern that match to be able to modify the line using all
  -- the (off, len) (if we start from begin, the (off,len) are nor more good after 1st replace)
  in fst $ mapAccumL replacePatternInLine line (reverse lOffLen)
  
replacePatternInLine :: L.ByteString -> (Int, Int) -> (L.ByteString, (Int, Int))
replacePatternInLine line (off_, len_) =
  let tiret = C.singleton '-'
      off = fromIntegral off_
      slice from to l = L.take (to-from+1) (L.drop from l)
  -- insert tiret in 2011-06-29, and remove milliseconds :000
  in (L.concat [L.take (off+5) line, tiret, slice (off+5) (off+6) line, tiret, slice (off+7) (off+17) line, L.drop (off+22) line], (off_, len_))
  
Something went wrong with that request. Please try again.