Skip to content

Commit

Permalink
Generate .debug.ghc section
Browse files Browse the repository at this point in the history
The conversion to DWARF is always lossy, so we put all the extra
bits of information into an extra object file section (.debug-ghc).

Notes:

* We use the eventlog format. This might seem like a slightly arbitrary
  choice, but makes it easy to copy debug data into eventlogs later in
  order to do profiling. In the meantime, it's well-defined and extensible,
  so until we run out of record IDs there's no strong reason against it
  either.

* Core notes now cause the complete Core to be copied. We are reasonably
  smart about this: We never emit a piece of Core twice, and use a compact
  binary representation for most Core constructors.

  On the other hand, we just pretty-print types as well as names and emit
  them as strings. This can sometimes lead to packets becoming too large
  for the eventlog format to handle (we had types break the 20k loc mark).
  In order to not run into these kinds of problems, we just omit packets
  that are longer than a certain threshold.

* The amount of data generated here is significant. We therefore use faily
  low-level generation code using memory buffers. Furthermore, we include
  the data as a string, escaped using another well-optimized low-level
  routine. All this might make it hard to read debug data in the assembly,
  but is absolutely required for debugging not to become a significant
  resource hog.

* The eventlog IDs used here were chosen primarily to avoid collisions.
  If this code gets merged they should be adjusted appropriately.
  • Loading branch information
scpmw committed Feb 14, 2014
1 parent 2f2f0ea commit bbf6f35
Show file tree
Hide file tree
Showing 6 changed files with 288 additions and 6 deletions.
221 changes: 217 additions & 4 deletions compiler/cmm/Debug.hs
Expand Up @@ -14,29 +14,43 @@ module Debug (
UnwindTable, UnwindExpr(..),
cmmDebugGen,
cmmDebugLink,
debugToMap
debugToMap,
writeDebugToEventlog

) where

import Binary
import BlockId ( blockLbl )
import CLabel
import Cmm
import CmmUtils
import CoreSyn
import FastString ( nilFS, mkFastString )
import DynFlags
import FastString ( nilFS, mkFastString, unpackFS )
import Module
import Outputable
import PprCore ()
import PprCmmExpr ( pprExpr )
import SrcLoc
import UniqFM
import Util
import Var ( Var, varType )

import Compiler.Hoopl

import Control.Monad ( foldM, forM, forM_, void )

import Data.Char ( ord)
import Data.Maybe
import Data.List ( find, minimumBy )
import Data.Ord ( comparing )
import Data.List ( find, minimumBy )
import Data.Ord ( comparing )
import qualified Data.Map as Map
import Data.Word ( Word8, Word16 )

import Foreign.ForeignPtr

#define EVENTLOG_CONSTANTS_ONLY
#include "../../includes/rts/EventLogFormat.h"

-- | Debug information about a block of code. Context is encoded through nesting.
data DebugBlock =
Expand Down Expand Up @@ -265,3 +279,202 @@ toUnwindExpr e@(CmmMachOp op [e1, e2]) =
toUnwindExpr e
= pprPanic "Unsupported unwind expression!" (ppr e)

-- | Generates debug data into a buffer
writeDebugToEventlog :: DynFlags -> ModLocation -> [DebugBlock] -> IO (Int, ForeignPtr Word8)
writeDebugToEventlog dflags mod_loc blocks = do

-- Write data into a binary memory handle
bh <- openBinMem $ 1024 * 1024
putEvent bh EVENT_DEBUG_MODULE $ do
putString bh $ packageIdString (thisPackage dflags)
putString bh $ fromMaybe "???" $ ml_hs_file mod_loc
void $ foldM (putBlock bh dflags maxBound) (0, emptyUFM) blocks
getBinMemBuf bh

-- | Packs the given static value into a (variable-length) event-log
-- packet.
putEvent :: BinHandle -> Word8 -> IO a -> IO a
putEvent bh id cts = catchSize bh 0x10000 wrap return
where wrap = do
put_ bh id
-- Put placeholder for size
sizePos <- put bh (0 :: Word16)
-- Put contents
a <- cts
-- Put final size
endPos <- tellBin bh
putAt bh sizePos $ fromIntegral $ (endPos `diffBin` sizePos) - 2
-- Seek back
seekBin bh endPos
return a

-- | Puts an alternate version if the first one is bigger than the
-- given limit.
--
-- This is a pretty crude way of handling oversized
-- packets... Can't think of a better way right now though.
catchSize :: BinHandle -> Int -> IO a -> (a -> IO a) -> IO a
catchSize bh limit cts1 cts2 = do

-- Put contents, note how much size it uses
start <- tellBin bh :: IO (Bin ())
a <- cts1
end <- tellBin bh

-- Seek back and put second version if size is over limit
if (end `diffBin` start) >= limit
then do seekBin bh start
cts2 a
else return a

type BlockId = Word16

putBlock :: BinHandle -> DynFlags -> BlockId -> (BlockId, CoreMap) -> DebugBlock
-> IO (BlockId, CoreMap)
putBlock bh dflags pid (bid, coreDone) block = do
-- Put sub-blocks
(bid', coreDoneSub) <- foldM (putBlock bh dflags bid) (bid+1, emptyUFM) (dblBlocks block)
-- Write our own data
putEvent bh EVENT_DEBUG_BLOCK $ do
put_ bh bid
put_ bh pid
let showSDocC = flip (renderWithStyle dflags) (mkCodeStyle CStyle)
putString bh $ showSDocC $ ppr (dblCLabel block)
-- Write annotations.
coreDoneBlock <- foldM (putAnnotEvent bh dflags coreDoneSub) emptyUFM (dblTicks block)
return (bid', coreDone `unionCoreMap` coreDoneSub `unionCoreMap` coreDoneBlock)

putAnnotEvent :: BinHandle -> DynFlags -> CoreMap -> CoreMap -> RawTickish -> IO CoreMap
putAnnotEvent bh _ _ coreDone (SourceNote ss names) = do
putEvent bh EVENT_DEBUG_SOURCE $ do
put_ bh $ encLoc $ srcSpanStartLine ss
put_ bh $ encLoc $ srcSpanStartCol ss
put_ bh $ encLoc $ srcSpanEndLine ss
put_ bh $ encLoc $ srcSpanEndCol ss
putString bh $ unpackFS $ srcSpanFile ss
putString bh names
return coreDone
where encLoc x = fromIntegral x :: Word16

putAnnotEvent bh dflags coreDoneSub coreDone (CoreNote lbl corePtr)
-- This piece of core was already covered earlier in this block?
| not $ (lbl, exprPtrCons corePtr) `elemCoreMap` coreDone
= putEvent bh EVENT_DEBUG_CORE $ do
putString bh $ showSDocDump dflags $ ppr lbl
-- Emit core, leaving out (= referencing) any core pieces
-- that were emitted from sub-blocks
doneNew <- case corePtr of
ExprPtr core -> putCoreExpr bh dflags coreDoneSub core
AltPtr alt -> putCoreAlt bh dflags coreDoneSub alt
return $ addToCoreMap lbl (exprPtrCons corePtr) (coreDone `unionCoreMap` doneNew)

putAnnotEvent _ _ _ coreDone _ = return coreDone

-- | Constants for core binary representation
coreMisc, coreApp, coreRef, coreLam, coreLet, coreCase, coreAlt :: Word8
coreMisc = 0
coreApp = 1
coreRef = 2
coreLam = 3
coreLet = 4
coreCase = 5
coreAlt = 6

putCoreExpr :: BinHandle -> DynFlags -> CoreMap -> CoreExpr -> IO CoreMap
putCoreExpr bh dflags bs (App e1 e2) = do
put_ bh coreApp
d1 <- putCoreExpr bh dflags bs e1
d2 <- putCoreExpr bh dflags bs e2
return $ d1 `unionCoreMap` d2
putCoreExpr bh dflags bs (Lam b e) = do
put_ bh coreLam
putString bh $ showSDoc dflags $ ppr b
putString bh $ showSDoc dflags $ ppr $ varType b
putCoreExpr bh dflags bs e
putCoreExpr bh dflags bs (Let es e) = do
put_ bh coreLet
d1 <- putCoreLet bh dflags bs es
d2 <- putCoreExpr bh dflags bs e
return $ d1 `unionCoreMap` d2
putCoreExpr bh dflags bs (Case expr bind _ alts) = do
put_ bh coreCase
d <- putCoreExpr bh dflags bs expr
putString bh $ showSDoc dflags $ ppr bind
putString bh $ showSDoc dflags $ ppr $ varType bind
put_ bh (fromIntegral (length alts) :: Word16)
fmap (foldr unionCoreMap d) $
forM alts $ \alt@(a, _, _) ->
checkCoreRef bh dflags bs (bind, a) $
putCoreAlt bh dflags bs alt
putCoreExpr bh dflags bs (Cast e _) = putCoreExpr bh dflags bs e
putCoreExpr bh dflags bs (Tick _ e) = putCoreExpr bh dflags bs e
-- All other elements are supposed to have a simple "pretty printed"
-- representation that we can simply output verbatim.
putCoreExpr bh dflags _ other = do
put_ bh coreMisc
putString bh $ showSDoc dflags $ ppr other
return emptyUFM

putCoreAlt :: BinHandle -> DynFlags -> CoreMap -> CoreAlt -> IO CoreMap
putCoreAlt bh dflags bs (a,binds,e) = do
put_ bh coreAlt
putString bh $ case a of
DEFAULT -> ""
_ -> showSDoc dflags $ ppr a
put_ bh (fromIntegral (length binds) :: Word16)
forM_ binds $ \b -> do
putString bh . showSDoc dflags . ppr $ b
putString bh . showSDoc dflags . ppr . varType $ b
putCoreExpr bh dflags bs e

putCoreLet :: BinHandle -> DynFlags -> CoreMap -> CoreBind -> IO CoreMap
putCoreLet bh dflags bs (NonRec b e) = do
put_ bh (1 :: Word16) -- could use 0 to mark non-recursive case?
putString bh $ showSDoc dflags $ ppr b
putString bh $ showSDoc dflags $ ppr $ varType b
checkCoreRef bh dflags bs (b, DEFAULT) $
putCoreExpr bh dflags bs e
putCoreLet bh dflags bs (Rec ps) = do
put_ bh (fromIntegral (length ps) :: Word16)
fmap (foldr unionCoreMap emptyUFM) $
forM ps $ \(b, e) -> do
putString bh $ showSDoc dflags $ ppr b
putString bh $ showSDoc dflags $ ppr $ varType b
checkCoreRef bh dflags bs (b, DEFAULT) $
putCoreExpr bh dflags bs e

-- | Generate reference to core piece that was output elsewhere... Or
-- proceed with given code otherwise.
checkCoreRef :: BinHandle -> DynFlags -> CoreMap -> (Var, AltCon) -> IO CoreMap -> IO CoreMap
checkCoreRef bh dflags bs (b,a) code
| (b,a) `elemCoreMap` bs = do
put_ bh coreRef
putString bh $ showSDocDump dflags $ ppr b
putString bh $ case a of
DEFAULT -> ""
_ -> showSDoc dflags $ ppr a
return emptyUFM
| otherwise = fmap (addToCoreMap b a) code

-- | Put a C-style string (null-terminated). We assume that the string
-- is ASCII.
--
-- This could well be subject to change in future...
putString :: BinHandle -> String -> IO ()
putString bh str = do
mapM_ (putByte bh . fromIntegral . ord) str
putByte bh 0

-- | Holds identities of core pieces we have decided to output
type CoreMap = UniqFM [AltCon]

elemCoreMap :: (Var, AltCon) -> CoreMap -> Bool
elemCoreMap (bind, con) m = case lookupUFM m bind of
Just cs -> con `elem` cs
Nothing -> False

addToCoreMap :: Var -> AltCon -> CoreMap -> CoreMap
addToCoreMap b a d = addToUFM_C (\o _ -> a:o) d b [a]

unionCoreMap :: CoreMap -> CoreMap -> CoreMap
unionCoreMap = plusUFM_C (++)
7 changes: 6 additions & 1 deletion compiler/nativeGen/Dwarf.hs
Expand Up @@ -86,7 +86,12 @@ dwarfGen df modLoc us blocks = do
debugFrameHeader framesU $$
debugFrames framesU procs

return (infoSct $$ abbrevSct $$ lineSct $$ frameSct, us'')
-- .debug_ghc section: debug data in eventlog format (GHC-specific, obviously)

This comment has been minimized.

Copy link
@tibbe

tibbe Feb 28, 2014

Since this section is large and non-standard, perhaps we should add a flag so it can be emitted if users only care for the standard debug info e.g. because they want (some kind of) stack traces. I'd be worried in growing binaries a lot just to get stack traces.

Have a look at http://gcc.gnu.org/onlinedocs/gcc/Debugging-Options.html for an idea of what such flags could look like. Interesting options include

  • -gghc, which would include the .ghc_debug section (equivalent to GCC's -ggdb).
  • -gsplit-dwarf, which would allow you to have a binary without debug info running on some server and when it crashes, use the core dump together with the .dwo file generated during compilation and stashed away somewhere to get a stack trace with line numbers.
  • -gvms<level>, to control the amount of information.
  • -pg, in case you have some data used for profiling only.

This comment has been minimized.

Copy link
@Tarrasch

Tarrasch Feb 28, 2014

The -gsplit-dwarf flag seems very interesting. But it would be good to have an idea how much the binary size increases with -g in the first place. Splitting is probably not necessary if the increase is with 5%, but it could be very useful to split if the binary increases with 500% in size.

This comment has been minimized.

Copy link
@scpmw

scpmw Feb 28, 2014

Author Owner

Good point. On the other hand, we can probably assume that anybody using -g is probably willing to tolerate a certain amount of binary growth. And people that are really interested in minimizing file size can use strip to throw away anything that they're not interested in. According to a quick search, -gsplit-dwarf looks like it just automates a similar process using objcopy.

That being said, I agree that the .debug_ghc section is actually too large right now (27B of 41MB total for libHSbase.so). It should be possible to make it more compact (say, files/string tables), but if it still turns out to be at a problematic size we can think of making generating it optional-off or even default-off with -g.

This comment has been minimized.

Copy link
@tibbe

tibbe Feb 28, 2014

At Google we do what I said above, split the DWARF information into a separate file and only copy the normal binary to the N machines that will run that binary. I think being able to omit the .debug_ghc section is more important than being able to omit the rest. I don't know what the .debug_ghc section is used for yet so I cannot say whether it should be on or off by default.

This comment has been minimized.

Copy link
@simonmar

simonmar Feb 28, 2014

I'm probably missing something, but isn't the debug info only generated when you use -g?

This comment has been minimized.

Copy link
@tibbe

tibbe Feb 28, 2014

@simonmar I'd like an option to only generate the standard DWARF debug sections (e.g. line numbers etc), without generating the much larger .debug_ghc section. Then I could leave the debug information in all my binaries, even those I push to production, and get the benefit of having stack traces and using various standard profiling tools to analyse a binary running e.g. on a production server somewhere in the cloud.

If the only option is including all debug info, it might be too costly to leave on by default.

This comment has been minimized.

Copy link
@simonmar

simonmar Mar 2, 2014

make sense now. Thanks!

This comment has been minimized.

Copy link
@Tarrasch

Tarrasch Mar 4, 2014

Hello guys. I looked at the section sizes for the ghc binary. And I include a pie chart (with most smaller sections removed). Hopefully this will give a better idea of how much space everything takes.

section-for-ghc

I did this basically:

$ size --format=SysV /home/arash/repos/ghc/inplace/lib/bin/ghc-stage2
/home/arash/repos/ghc/inplace/lib/bin/ghc-stage2  :
section                   size      addr
.interp                     28   4194872
.note.ABI-tag               32   4194900
...
.debug_ghc              820940         0
.debug-ghc-link-info      4317         0
Total                  2764622

Then copy paste into LibreOffice Calc.


An unrelated question: What makes the .debug_ghc special so that I can strip it from my binaries in the cloud, but still use it later offline? I thought you could skip all dwarf information and just retain the symbol table for your binaries in production. Would this not work?

This comment has been minimized.

Copy link
@Tarrasch

Tarrasch Mar 4, 2014

Whoups, never mind my question. I think I figured it out...

This comment has been minimized.

Copy link
@tibbe

tibbe via email Mar 4, 2014

This comment has been minimized.

Copy link
@Tarrasch

Tarrasch Mar 4, 2014

Oh, I definitely did not intend to prove any point with the data. But it seems to me that the -gsplit-dwarf flag would make sense. I'm glad that you liked the data. :)

evData <- writeDebugToEventlog df modLoc blocks
let ghcSct = dwarfGhcSection $$
pprBuffer evData

return (infoSct $$ abbrevSct $$ lineSct $$ frameSct $$ ghcSct, us'')

-- | Header for a compilation unit, establishing global format
-- parameters
Expand Down
41 changes: 41 additions & 0 deletions compiler/nativeGen/Dwarf/Types.hs
Expand Up @@ -8,9 +8,11 @@ module Dwarf.Types
, pprWord
, pprLEBWord
, pprLEBInt
, pprBuffer
)
where

import Binary
import CLabel
import FastString
import Outputable
Expand All @@ -22,6 +24,10 @@ import Data.Bits
import Data.Word
import Data.Char

import Foreign

import qualified System.IO.Unsafe as Unsafe

-- | Individual dwarf records
data DwarfInfo
= DwarfCompileUnit { dwChildren :: [DwarfInfo]
Expand Down Expand Up @@ -170,3 +176,38 @@ pprDwarfInfoOpen (DwarfBlock _ label marker) = sdocWithDynFlags $ \df ->

pprDwarfInfoClose :: SDoc
pprDwarfInfoClose = pprAbbrev DwAbbrNull

-- | Generate code for emitting the given buffer. Will take care to
-- escape it appropriatly.
pprBuffer :: (Int, ForeignPtr Word8) -> SDoc
pprBuffer (len, buf) = Unsafe.unsafePerformIO $ do

-- As we output a string, we need to do escaping. We approximate
-- here that the escaped string will have double the size of the
-- original buffer. That should be plenty of space given the fact
-- that we expect to be converting a lot of text.
bh <- openBinMem (len * 2)
let go p q | p == q = return ()
| otherwise = peek p >>= escape . fromIntegral >> go (p `plusPtr` 1) q
escape c
| c == ord '\\' = putB '\\' >> putB '\\'
| c == ord '\"' = putB '\\' >> putB '"'
| c == ord '\n' = putB '\\' >> putB 'n'
| c == ord '?' = putB '\\' >> putB '?' -- silence trigraph warnings
| isAscii (chr c) && isPrint (chr c)
= putByte bh (fromIntegral c)
| otherwise = do putB '\\'
putB $ intToDigit (c `div` 64)
putB $ intToDigit ((c `div` 8) `mod` 8)
putB $ intToDigit (c `mod` 8)
putB :: Char -> IO ()
putB = putByte bh . fromIntegral . ord
{-# INLINE putB #-}
withForeignPtr buf $ \p ->
go p (p `plusPtr` len)

-- Pack result into a string
(elen, ebuf) <- getBinMemBuf bh
buf <- withForeignPtr ebuf $ \p -> mkFastStringForeignPtr p ebuf elen

return $ ptext (sLit "\t.ascii ") <> doubleQuotes (ftext buf)
11 changes: 11 additions & 0 deletions compiler/utils/Binary.hs
Expand Up @@ -27,9 +27,11 @@ module Binary
seekBy,
tellBin,
castBin,
diffBin,

writeBinMem,
readBinMem,
getBinMemBuf,

fingerprintBinMem,
computeFingerprint,
Expand Down Expand Up @@ -124,6 +126,9 @@ newtype Bin a = BinPtr Int
castBin :: Bin a -> Bin b
castBin (BinPtr i) = BinPtr i

diffBin :: Bin a -> Bin a -> Int
diffBin (BinPtr i) (BinPtr j) = i - j

---------------------------------------------------------------
-- class Binary
---------------------------------------------------------------
Expand Down Expand Up @@ -208,6 +213,12 @@ readBinMem filename = do
writeFastMutInt sz_r filesize
return (BinMem noUserData ix_r sz_r arr_r)

getBinMemBuf :: BinHandle -> IO (Int, ForeignPtr Word8)
getBinMemBuf (BinMem _ ix_r _ arr_r) = do
arr <- readIORef arr_r
ix <- readFastMutInt ix_r
return (ix, arr)

fingerprintBinMem :: BinHandle -> IO Fingerprint
fingerprintBinMem (BinMem _ ix_r _ arr_r) = do
arr <- readIORef arr_r
Expand Down
10 changes: 9 additions & 1 deletion includes/rts/EventLogFormat.h
Expand Up @@ -172,12 +172,20 @@

/* Range 140 - 159 is reserved for Perf events. */

/* Range 200 - 210 is hereby reserved for profiling stuff. In hopes that I this
is were I can find some peace. */

#define EVENT_DEBUG_MODULE 200
#define EVENT_DEBUG_BLOCK 201
#define EVENT_DEBUG_SOURCE 202
#define EVENT_DEBUG_CORE 203

/*
* The highest event code +1 that ghc itself emits. Note that some event
* ranges higher than this are reserved but not currently emitted by ghc.
* This must match the size of the EventDesc[] array in EventLog.c
*/
#define NUM_GHC_EVENT_TAGS 59
#define NUM_GHC_EVENT_TAGS 204

#if 0 /* DEPRECATED EVENTS: */
/* we don't actually need to record the thread, it's implicit */
Expand Down
4 changes: 4 additions & 0 deletions rts/eventlog/EventLog.c
Expand Up @@ -106,6 +106,10 @@ char *EventDesc[] = {
[EVENT_TASK_CREATE] = "Task create",
[EVENT_TASK_MIGRATE] = "Task migrate",
[EVENT_TASK_DELETE] = "Task delete",
[EVENT_DEBUG_MODULE] = "Debug Module",
[EVENT_DEBUG_BLOCK] = "Debug Block",
[EVENT_DEBUG_SOURCE] = "Debug Source",
[EVENT_DEBUG_CORE] = "Debug Core",
};

// Event type.
Expand Down

0 comments on commit bbf6f35

Please sign in to comment.