Skip to content

Commit

Permalink
Initial stab at more involved fix for jgm#1607
Browse files Browse the repository at this point in the history
This patch attempts to build a style name -> style id mapping based on styles.xml from reference doc, and changes pStyle and rStyle to accept style name as a parameter instead of styleId. There is a fallback mechanic that removes spaces from style name and returns it as style id, but it likely won't help much.

Style names are matched lower-case, since headings and `footnote text` have lowercase names.
  • Loading branch information
lierdakil committed Feb 21, 2015
1 parent a7c67c8 commit 5cdd117
Showing 1 changed file with 86 additions and 64 deletions.
150 changes: 86 additions & 64 deletions src/Text/Pandoc/Writers/Docx.hs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion of 'Pandoc' documents to docx.
-}
module Text.Pandoc.Writers.Docx ( writeDocx ) where
import Data.List ( intercalate, isPrefixOf, isSuffixOf, stripPrefix )
import Data.List ( intercalate, isPrefixOf, isSuffixOf )
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as BL
import qualified Data.ByteString.Lazy.Char8 as BL8
Expand Down Expand Up @@ -64,7 +64,7 @@ import Text.Pandoc.MIME (MimeType, getMimeType, getMimeTypeDef,
extensionFromMimeType)
import Control.Applicative ((<$>), (<|>), (<*>))
import Data.Maybe (fromMaybe, mapMaybe)
import Data.Char (isDigit)
import Data.Char (toLower)

data ListMarker = NoMarker
| BulletMarker
Expand All @@ -90,6 +90,9 @@ listMarkerToId (NumberMarker sty delim n) =
OneParen -> '2'
TwoParens -> '3'

newtype ParaStyleMap = ParaStyleMap (M.Map String String) deriving Show
newtype CharStyleMap = CharStyleMap (M.Map String String) deriving Show

data WriterState = WriterState{
stTextProperties :: [Element]
, stParaProperties :: [Element]
Expand All @@ -106,7 +109,8 @@ data WriterState = WriterState{
, stChangesAuthor :: String
, stChangesDate :: String
, stPrintWidth :: Integer
, stHeadingStyles :: [(Int,String)]
, stParaStyles :: ParaStyleMap
, stCharStyles :: CharStyleMap
, stFirstPara :: Bool
}

Expand All @@ -127,7 +131,8 @@ defaultWriterState = WriterState{
, stChangesAuthor = "unknown"
, stChangesDate = "1969-12-31T19:00:00Z"
, stPrintWidth = 1
, stHeadingStyles = []
, stParaStyles = ParaStyleMap M.empty
, stCharStyles = CharStyleMap M.empty
, stFirstPara = False
}

Expand Down Expand Up @@ -218,29 +223,25 @@ writeDocx opts doc@(Pandoc meta _) = do
let styleNamespaces = map ((,) <$> qName . attrKey <*> attrVal) .
filter ((==Just "xmlns") . qPrefix . attrKey) .
elAttribs $ styledoc
let headingStyles =
let
mywURI = lookup "w" styleNamespaces
myName name = QName name mywURI (Just "w")
getAttrStyleId = findAttr (myName "styleId")
getNameVal = findChild (myName "name") >=> findAttr (myName "val")
getNum s | not $ null s, all isDigit s = Just (read s :: Int)
| otherwise = Nothing
getEngHeader = getAttrStyleId >=> stripPrefix "Heading" >=> getNum
getIntHeader = getNameVal >=> stripPrefix "heading " >=> getNum
toTuple getF = liftM2 (,) <$> getF <*> getAttrStyleId
toMap getF = mapMaybe (toTuple getF) $
findChildren (myName "style") styledoc
select a b | not $ null a = a
| otherwise = b
in
select (toMap getEngHeader) (toMap getIntHeader)
mywURI = lookup "w" styleNamespaces
myName name = QName name mywURI (Just "w")
getAttrStyleId = findAttr (myName "styleId")
getAttrType = findAttr (myName "type")
isParaStyle = (Just "paragraph" ==) . getAttrType
isCharStyle = (Just "character" ==) . getAttrType
getNameVal = findChild (myName "name") >=> findAttr (myName "val") >=> return . map toLower
genStyleItem f e | f e = liftM2 (,) <$> getNameVal <*> getAttrStyleId $ e
| otherwise = Nothing
genStyleMap f = M.fromList $ mapMaybe (genStyleItem f) $ findChildren (myName "style") styledoc
paraStyles = ParaStyleMap $ genStyleMap isParaStyle
charStyles = CharStyleMap $ genStyleMap isCharStyle

((contents, footnotes), st) <- runStateT (writeOpenXML opts{writerWrapText = False} doc')
defaultWriterState{ stChangesAuthor = fromMaybe "unknown" username
, stChangesDate = formatTime defaultTimeLocale "%FT%XZ" utctime
, stPrintWidth = (maybe 420 (\x -> quot x 20) pgContentWidth)
, stHeadingStyles = headingStyles}
, stParaStyles = paraStyles
, stCharStyles = charStyles}
let epochtime = floor $ utcTimeToPOSIXSeconds utctime
let imgs = M.elems $ stImages st

Expand Down Expand Up @@ -602,14 +603,14 @@ writeOpenXML opts (Pandoc meta blocks) = do
Just (MetaBlocks [Para xs]) -> xs
Just (MetaInlines xs) -> xs
_ -> []
title <- withParaProp (pStyle "Title") $ blocksToOpenXML opts [Para tit | not (null tit)]
subtitle <- withParaProp (pStyle "Subtitle") $ blocksToOpenXML opts [Para subtitle' | not (null subtitle')]
authors <- withParaProp (pStyle "Author") $ blocksToOpenXML opts $
title <- withParaPropM (pStyleM "Title") $ blocksToOpenXML opts [Para tit | not (null tit)]
subtitle <- withParaPropM (pStyleM "Subtitle") $ blocksToOpenXML opts [Para subtitle' | not (null subtitle')]
authors <- withParaPropM (pStyleM "Author") $ blocksToOpenXML opts $
map Para auths
date <- withParaProp (pStyle "Date") $ blocksToOpenXML opts [Para dat | not (null dat)]
date <- withParaPropM (pStyleM "Date") $ blocksToOpenXML opts [Para dat | not (null dat)]
abstract <- if null abstract'
then return []
else withParaProp (pStyle "Abstract") $ blocksToOpenXML opts abstract'
else withParaPropM (pStyleM "Abstract") $ blocksToOpenXML opts abstract'
let convertSpace (Str x : Space : Str y : xs) = Str (x ++ " " ++ y) : xs
convertSpace (Str x : Str y : xs) = Str (x ++ y) : xs
convertSpace xs = xs
Expand All @@ -623,11 +624,24 @@ writeOpenXML opts (Pandoc meta blocks) = do
blocksToOpenXML :: WriterOptions -> [Block] -> WS [Element]
blocksToOpenXML opts bls = concat `fmap` mapM (blockToOpenXML opts) bls

pStyle :: String -> Element
pStyle sty = mknode "w:pStyle" [("w:val",sty)] ()
getStyleId :: String -> M.Map String String -> String
getStyleId s = M.findWithDefault (filter (/=' ') s) (map toLower s)

pStyle :: String -> ParaStyleMap -> Element
pStyle sty (ParaStyleMap m) = mknode "w:pStyle" [("w:val",sty')] ()
where
sty' = getStyleId sty m

pStyleM :: String -> WS XML.Element
pStyleM = flip fmap (gets stParaStyles) . pStyle

rStyle :: String -> CharStyleMap -> Element
rStyle sty (CharStyleMap m) = mknode "w:rStyle" [("w:val",sty')] ()
where
sty' = getStyleId sty m

rStyle :: String -> Element
rStyle sty = mknode "w:rStyle" [("w:val",sty)] ()
rStyleM :: String -> WS XML.Element
rStyleM = flip fmap (gets stCharStyles) . rStyle

getUniqueId :: MonadIO m => m String
-- the + 20 is to ensure that there are no clashes with the rIds
Expand All @@ -641,13 +655,12 @@ blockToOpenXML opts (Div (_,["references"],_) bs) = do
let (hs, bs') = span isHeaderBlock bs
header <- blocksToOpenXML opts hs
-- We put the Bibliography style on paragraphs after the header
rest <- withParaProp (pStyle "Bibliography") $ blocksToOpenXML opts bs'
rest <- withParaPropM (pStyleM "Bibliography") $ blocksToOpenXML opts bs'
return (header ++ rest)
blockToOpenXML opts (Div _ bs) = blocksToOpenXML opts bs
blockToOpenXML opts (Header lev (ident,_,_) lst) = do
setFirstPara
headingStyles <- gets stHeadingStyles
paraProps <- maybe id (withParaProp . pStyle) (lookup lev headingStyles) $
paraProps <- withParaPropM (pStyleM ("Heading "++show lev)) $
getParaProps False
contents <- inlinesToOpenXML opts lst
usedIdents <- gets stSectionIds
Expand All @@ -660,26 +673,27 @@ blockToOpenXML opts (Header lev (ident,_,_) lst) = do
,("w:name",bookmarkName)] ()
let bookmarkEnd = mknode "w:bookmarkEnd" [("w:id", id')] ()
return [mknode "w:p" [] (paraProps ++ [bookmarkStart, bookmarkEnd] ++ contents)]
blockToOpenXML opts (Plain lst) = withParaProp (pStyle "Compact")
blockToOpenXML opts (Plain lst) = withParaPropM (pStyleM "Compact")
$ blockToOpenXML opts (Para lst)
-- title beginning with fig: indicates that the image is a figure
blockToOpenXML opts (Para [Image alt (src,'f':'i':'g':':':tit)]) = do
setFirstPara
paraProps <- getParaProps False
contents <- inlinesToOpenXML opts [Image alt (src,tit)]
captionNode <- withParaProp (pStyle "ImageCaption")
captionNode <- withParaPropM (pStyleM "Image Caption")
$ blockToOpenXML opts (Para alt)
return $ mknode "w:p" [] (paraProps ++ contents) : captionNode
-- fixDisplayMath sometimes produces a Para [] as artifact
blockToOpenXML _ (Para []) = return []
blockToOpenXML opts (Para lst) = do
isFirstPara <- gets stFirstPara
isFirstPara <- gets stFirstPara
paraProps <- getParaProps $ case lst of
[Math DisplayMath _] -> True
_ -> False
pSM <- gets stParaStyles
let paraProps' = case paraProps of
[] | isFirstPara -> [mknode "w:pPr" [] [(pStyle "FirstParagraph")]]
[] -> [mknode "w:pPr" [] [(pStyle "BodyText")]]
[] | isFirstPara -> [mknode "w:pPr" [] [(pStyle "First Paragraph" pSM)]]
[] -> [mknode "w:pPr" [] [(pStyle "Body Text" pSM)]]
ps -> ps
modify $ \s -> s { stFirstPara = False }
contents <- inlinesToOpenXML opts lst
Expand All @@ -688,11 +702,11 @@ blockToOpenXML _ (RawBlock format str)
| format == Format "openxml" = return [ x | Elem x <- parseXML str ]
| otherwise = return []
blockToOpenXML opts (BlockQuote blocks) = do
p <- withParaProp (pStyle "BlockQuote") $ blocksToOpenXML opts blocks
p <- withParaPropM (pStyleM "Block Quote") $ blocksToOpenXML opts blocks
setFirstPara
return p
blockToOpenXML opts (CodeBlock attrs str) = do
p <- withParaProp (pStyle "SourceCode") $ (blockToOpenXML opts $ Para [Code attrs str])
p <- withParaPropM (pStyleM "Source Code") (blockToOpenXML opts $ Para [Code attrs str])
setFirstPara
return p
blockToOpenXML _ HorizontalRule = do
Expand All @@ -707,7 +721,7 @@ blockToOpenXML opts (Table caption aligns widths headers rows) = do
let captionStr = stringify caption
caption' <- if null caption
then return []
else withParaProp (pStyle "TableCaption")
else withParaPropM (pStyleM "Table Caption")
$ blockToOpenXML opts (Para caption)
let alignmentFor al = mknode "w:jc" [("w:val",alignmentToString al)] ()
let cellToOpenXML (al, cell) = withParaProp (alignmentFor al)
Expand Down Expand Up @@ -767,9 +781,9 @@ blockToOpenXML opts (DefinitionList items) = do

definitionListItemToOpenXML :: WriterOptions -> ([Inline],[[Block]]) -> WS [Element]
definitionListItemToOpenXML opts (term,defs) = do
term' <- withParaProp (pStyle "DefinitionTerm")
term' <- withParaPropM (pStyleM "Definition Term")
$ blockToOpenXML opts (Para term)
defs' <- withParaProp (pStyle "Definition")
defs' <- withParaPropM (pStyleM "Definition")
$ concat `fmap` mapM (blocksToOpenXML opts) defs
return $ term' ++ defs'

Expand Down Expand Up @@ -833,6 +847,9 @@ withTextProp d p = do
popTextProp
return res

withTextPropM :: WS Element -> WS a -> WS a
withTextPropM = (. flip withTextProp) . (>>=)

getParaProps :: Bool -> WS [Element]
getParaProps displayMathPara = do
props <- gets stParaProperties
Expand Down Expand Up @@ -861,6 +878,9 @@ withParaProp d p = do
popParaProp
return res

withParaPropM :: WS Element -> WS a -> WS a
withParaPropM = (. flip withParaProp) . (>>=)

formattedString :: String -> WS [Element]
formattedString str = do
props <- getTextProps
Expand Down Expand Up @@ -943,25 +963,27 @@ inlineToOpenXML opts (Math mathType str) = do
Right r -> return [r]
Left _ -> inlinesToOpenXML opts (texMathToInlines mathType str)
inlineToOpenXML opts (Cite _ lst) = inlinesToOpenXML opts lst
inlineToOpenXML opts (Code attrs str) =
withTextProp (rStyle "VerbatimChar")
$ if writerHighlight opts
then case highlight formatOpenXML attrs str of
Nothing -> unhighlighted
Just h -> return h
else unhighlighted
where unhighlighted = intercalate [br] `fmap`
(mapM formattedString $ lines str)
formatOpenXML _fmtOpts = intercalate [br] . map (map toHlTok)
toHlTok (toktype,tok) = mknode "w:r" []
[ mknode "w:rPr" []
[ rStyle $ show toktype ]
, mknode "w:t" [("xml:space","preserve")] tok ]
inlineToOpenXML opts (Code attrs str) = do
rSM <- gets stCharStyles
let unhighlighted = intercalate [br] `fmap`
(mapM formattedString $ lines str)
formatOpenXML _fmtOpts = intercalate [br] . map (map toHlTok)
toHlTok (toktype,tok) = mknode "w:r" []
[ mknode "w:rPr" []
[ rStyle (show toktype) rSM ]
, mknode "w:t" [("xml:space","preserve")] tok ]
withTextProp (rStyle "Verbatim Char" rSM)
$ if writerHighlight opts
then case highlight formatOpenXML attrs str of
Nothing -> unhighlighted
Just h -> return h
else unhighlighted
inlineToOpenXML opts (Note bs) = do
notes <- gets stFootnotes
notenum <- getUniqueId
rSM <- gets stCharStyles
let notemarker = mknode "w:r" []
[ mknode "w:rPr" [] (rStyle "FootnoteRef")
[ mknode "w:rPr" [] (rStyle "Footnote Ref" rSM)
, mknode "w:footnoteRef" [] () ]
let notemarkerXml = RawInline (Format "openxml") $ ppElement notemarker
let insertNoteRef (Plain ils : xs) = Plain (notemarkerXml : ils) : xs
Expand All @@ -971,22 +993,22 @@ inlineToOpenXML opts (Note bs) = do
oldParaProperties <- gets stParaProperties
oldTextProperties <- gets stTextProperties
modify $ \st -> st{ stListLevel = -1, stParaProperties = [], stTextProperties = [] }
contents <- withParaProp (pStyle "FootnoteText") $ blocksToOpenXML opts
contents <- withParaPropM (pStyleM "Footnote Text") $ blocksToOpenXML opts
$ insertNoteRef bs
modify $ \st -> st{ stListLevel = oldListLevel, stParaProperties = oldParaProperties,
stTextProperties = oldTextProperties }
let newnote = mknode "w:footnote" [("w:id", notenum)] $ contents
modify $ \s -> s{ stFootnotes = newnote : notes }
return [ mknode "w:r" []
[ mknode "w:rPr" [] (rStyle "FootnoteRef")
[ mknode "w:rPr" [] (rStyle "Footnote Ref" rSM)
, mknode "w:footnoteReference" [("w:id", notenum)] () ] ]
-- internal link:
inlineToOpenXML opts (Link txt ('#':xs,_)) = do
contents <- withTextProp (rStyle "Link") $ inlinesToOpenXML opts txt
contents <- withTextPropM (rStyleM "Link") $ inlinesToOpenXML opts txt
return [ mknode "w:hyperlink" [("w:anchor",xs)] contents ]
-- external link:
inlineToOpenXML opts (Link txt (src,_)) = do
contents <- withTextProp (rStyle "Link") $ inlinesToOpenXML opts txt
contents <- withTextPropM (rStyleM "Link") $ inlinesToOpenXML opts txt
extlinks <- gets stExternalLinks
id' <- case M.lookup src extlinks of
Just i -> return i
Expand Down Expand Up @@ -1088,7 +1110,7 @@ defaultFootnotes = [ mknode "w:footnote"
[ mknode "w:p" [] $
[ mknode "w:r" [] $
[ mknode "w:continuationSeparator" [] ()]]]]

parseXml :: Archive -> Archive -> String -> IO Element
parseXml refArchive distArchive relpath =
case ((findEntryByPath relpath refArchive `mplus`
Expand Down

0 comments on commit 5cdd117

Please sign in to comment.