Skip to content

Commit

Permalink
Add module for request processing and close #24
Browse files Browse the repository at this point in the history
  • Loading branch information
kawu committed Nov 23, 2013
1 parent f902078 commit 2c900fe
Show file tree
Hide file tree
Showing 5 changed files with 169 additions and 114 deletions.
1 change: 1 addition & 0 deletions concraft-pl.cabal
Expand Up @@ -42,6 +42,7 @@ library
NLP.Concraft.Polish
, NLP.Concraft.Polish.Maca
, NLP.Concraft.Polish.Morphosyntax
, NLP.Concraft.Polish.Request
, NLP.Concraft.Polish.Server

other-modules:
Expand Down
62 changes: 8 additions & 54 deletions src/NLP/Concraft/Polish.hs
Expand Up @@ -10,14 +10,11 @@ module NLP.Concraft.Polish
, C.loadModel

-- * Tagging
-- ** Plain
, tag
, tag'
, tagSent
-- ** Probabilities
, marginals
, marginals'
, marginalsSent

-- * Analysis
, macaPar

-- * Training
, TrainConf (..)
Expand All @@ -32,13 +29,10 @@ module NLP.Concraft.Polish
) where


import qualified Control.Monad.LazyIO as LazyIO
import Control.Applicative ((<$>))
import qualified Data.List.Split as Split
import qualified Data.Char as Char
import qualified Data.Text as T
import qualified Data.Text.Lazy as L
import qualified Data.Set as S

import qualified Data.Tagset.Positional as P
import qualified Numeric.SGD as SGD

Expand Down Expand Up @@ -96,26 +90,9 @@ tiersDefault =
-------------------------------------------------


-- | Perform morphological tagging on the input text.
tag :: MacaPool -> C.Concraft -> T.Text -> IO [Sent Tag]
tag pool concraft inp = map (tagSent concraft) <$> macaPar pool inp


-- | An alernative to `tag` which interprets empty lines as
-- paragraph ending markers. The function uses lazy IO so it
-- can be used to analyse large chunks of data.
tag' :: MacaPool -> C.Concraft -> L.Text -> IO [[Sent Tag]]
tag' pool concraft
= LazyIO.mapM (tag pool concraft . L.toStrict)
. map L.unlines
. Split.splitWhen
(L.all Char.isSpace)
. L.lines


-- | Tag the analysed sentence.
tagSent :: C.Concraft -> Sent Tag -> Sent Tag
tagSent concraft sent =
tag :: C.Concraft -> Sent Tag -> Sent Tag
tag concraft sent =
[ select' gs t seg
| (seg, gs, t) <- zip3 sent gss ts ]
where
Expand All @@ -127,32 +104,9 @@ tagSent concraft sent =
showTag = P.showTag tagset


-------------------------------------------------
-- Tagging with probabilities
-------------------------------------------------


-- | Tag the input text with morphosyntactic tags and corresponding
-- marginal probabilities.
marginals :: MacaPool -> C.Concraft -> T.Text -> IO [Sent Tag]
marginals pool concraft inp = map (marginalsSent concraft) <$> macaPar pool inp


-- | An alernative to `marginals` which interprets empty lines as
-- paragraph ending markers. The function uses lazy IO so it
-- can be used to analyse large chunks of data.
marginals' :: MacaPool -> C.Concraft -> L.Text -> IO [[Sent Tag]]
marginals' pool concraft
= LazyIO.mapM (marginals pool concraft . L.toStrict)
. map L.unlines
. Split.splitWhen
(L.all Char.isSpace)
. L.lines


-- | Tag the sentence with marginal probabilities.
marginalsSent :: C.Concraft -> Sent Tag -> Sent Tag
marginalsSent concraft sent
marginals :: C.Concraft -> Sent Tag -> Sent Tag
marginals concraft sent
= map (uncurry selectWMap)
$ zip wmaps sent
where
Expand Down
116 changes: 116 additions & 0 deletions src/NLP/Concraft/Polish/Request.hs
@@ -0,0 +1,116 @@
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}


module NLP.Concraft.Polish.Request
(
-- * Request
Request (..)
, Config (..)
-- ** Short
, Short (..)
, short
-- ** Long
, Long (..)
, long
) where


import Control.Applicative ((<$>), (<*>))
import qualified Control.Monad.LazyIO as LazyIO
import qualified Data.Char as Char
import qualified Data.List.Split as Split
import qualified Data.Text as T
import qualified Data.Text.Lazy as L
import qualified Data.Binary as B

import NLP.Concraft.Polish
import NLP.Concraft.Polish.Maca
import NLP.Concraft.Polish.Morphosyntax hiding (tag)


-------------------------------------------------
-- Configuration
-------------------------------------------------


-- | A request with configuration.
data Request t = Request {
-- | The actuall request.
rqBody :: t
-- | Request configuration.
, rqConf :: Config }


instance B.Binary t => B.Binary (Request t) where
put Request{..} = B.put rqBody >> B.put rqConf
get = Request <$> B.get <*> B.get


-- | Tagging configuration.
newtype Config = Config {
-- | Tag with marginal probabilities.
tagProbs :: Bool
} deriving (B.Binary)


-------------------------------------------------
-- Short request
-------------------------------------------------


-- | A short request.
data Short
= Short T.Text
| Par [Sent Tag]


instance B.Binary Short where
put (Short x) = B.putWord8 0 >> B.put x
put (Par x) = B.putWord8 1 >> B.put x
get = B.getWord8 >>= \x -> case x of
0 -> Short <$> B.get
_ -> Par <$> B.get


-- | Process the short request.
short :: MacaPool -> Concraft -> Request Short -> IO [Sent Tag]
short pool concraft Request{..} = case rqBody of
Short x -> map (tagit concraft) <$> macaPar pool x
Par x -> return $ map (tagit concraft) x
where
tagit = if tagProbs rqConf then marginals else tag


-------------------------------------------------
-- Long request
-------------------------------------------------


-- | A request to parse a long text.
data Long
= Long L.Text
| Doc [[Sent Tag]]


instance B.Binary Long where
put (Long x) = B.putWord8 0 >> B.put x
put (Doc x) = B.putWord8 1 >> B.put x
get = B.getWord8 >>= \x -> case x of
0 -> Long <$> B.get
_ -> Doc <$> B.get


-- | Process the long request given the processor for the
-- short request.
long :: (Request Short -> IO a) -> Request Long -> IO [a]
long handler Request{..} = case rqBody of
Long inp ->
LazyIO.mapM f . map L.unlines
. Split.splitWhen (L.all Char.isSpace)
. L.lines $ inp
Doc inp -> LazyIO.mapM g inp
where
f x = handler . r $ Short $ L.toStrict x
g x = handler . r $ Par x
r x = Request {rqBody = x, rqConf = rqConf}
38 changes: 9 additions & 29 deletions src/NLP/Concraft/Polish/Server.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}


module NLP.Concraft.Polish.Server
Expand All @@ -7,27 +8,22 @@ module NLP.Concraft.Polish.Server
runConcraftServer

-- * Client
, tag
, tag'
, submit
) where


import Control.Applicative ((<$>))
import Control.Monad (forever, void)
import Control.Concurrent (forkIO)
import System.IO (Handle, hFlush)
import qualified Control.Monad.LazyIO as LazyIO
import qualified Network as N
import qualified Data.Char as Char
import qualified Data.List.Split as Split
import qualified Data.Binary as B
import qualified Data.ByteString.Lazy as BS
import qualified Data.Text as T
import qualified Data.Text.Lazy as L

import NLP.Concraft.Polish.Morphosyntax hiding (tag)
import NLP.Concraft.Polish.Maca
import qualified NLP.Concraft.Polish as C
import qualified NLP.Concraft.Polish.Request as R


-------------------------------------------------
Expand All @@ -42,6 +38,7 @@ runConcraftServer pool concraft port = N.withSocketsDo $ do
forever $ sockHandler pool concraft sock


-- | Read and process short requests from the socket.
sockHandler :: MacaPool -> C.Concraft -> N.Socket -> IO ()
sockHandler pool concraft sock = do
(handle, _, _) <- N.accept sock
Expand All @@ -50,7 +47,7 @@ sockHandler pool concraft sock = do
-- putStrLn "Waiting for input..."
inp <- recvMsg handle
-- putStr "> " >> T.putStrLn inp
out <- C.tag pool concraft inp
out <- R.short pool concraft inp
-- putStr "No. of sentences: " >> print (length out)
sendMsg handle out

Expand All @@ -60,35 +57,18 @@ sockHandler pool concraft sock = do
-------------------------------------------------


-- | Perform morphological tagging on the input text.
tag :: N.HostName -> N.PortID -> T.Text -> IO [Sent Tag]
tag host port inp = do
-- | Submit the given request.
submit :: N.HostName -> N.PortID -> R.Request R.Short -> IO [Sent Tag]
submit host port inp = do
handle <- N.connectTo host port
-- putStrLn "Connection established"
-- putStr "Send request: " >> T.putStrLn inp
sendMsg handle inp
recvMsg handle


-- | An alernative tagging function which interprets
-- empty lines as paragraph ending markers.
-- The function uses lazy IO so it can be used to
-- analyse large chunks of data.
tag' :: N.HostName -> N.PortID -> L.Text -> IO [[Sent Tag]]
tag' host port
= LazyIO.mapM (tag host port . L.toStrict)
. map L.unlines
. Split.splitWhen
(L.all Char.isSpace)
. L.lines


-- -- | Like `tag` but assignes probabilities.
-- probs' ::


-------------------------------------------------
-- Messages
-- Communication
-------------------------------------------------


Expand Down

0 comments on commit 2c900fe

Please sign in to comment.