Skip to content

Commit

Permalink
Use MD5 checksums for recompilation checking (fixes #1372, #1959)
Browse files Browse the repository at this point in the history
This is a much more robust way to do recompilation checking.  The idea
is to create a fingerprint of the ABI of an interface, and track
dependencies by recording the fingerprints of ABIs that a module
depends on.  If any of those ABIs have changed, then we need to
recompile.

In bug #1372 we weren't recording dependencies on package modules,
this patch fixes that by recording fingerprints of package modules
that we depend on.  Within a package there is still fine-grained
recompilation avoidance as before.

We currently use MD5 for fingerprints, being a good compromise between
efficiency and security.  We're not worried about attackers, but we
are worried about accidental collisions.

All the MD5 sums do make interface files a bit bigger, but compile
times on the whole are about the same as before.  Recompilation
avoidance should be a bit more accurate than in 6.8.2 due to fixing
#1959, especially when using -O.
  • Loading branch information
simonmar committed May 28, 2008
1 parent 842e9d6 commit 526c3af
Show file tree
Hide file tree
Showing 25 changed files with 1,472 additions and 1,051 deletions.
2 changes: 1 addition & 1 deletion compiler/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -572,7 +572,7 @@ SRC_MKDEPENDC_OPTS += -I$(GHC_INCLUDE_DIR)

SRC_HC_OPTS += \
-cpp -fglasgow-exts -fno-generics -Rghc-timing \
-I. -Iparser
-I. -Iparser -Iutil

# Omitted: -I$(GHC_INCLUDE_DIR)
# We should have -I$(GHC_INCLUDE_DIR) in SRC_HC_OPTS,
Expand Down
22 changes: 19 additions & 3 deletions compiler/basicTypes/Module.lhs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ module Module
modulePackageId, moduleName,
pprModule,
mkModule,
stableModuleCmp,
-- * The ModuleLocation type
ModLocation(..),
Expand Down Expand Up @@ -71,6 +72,7 @@ import FiniteMap
import LazyUniqFM
import FastString
import Binary
import Util
import System.FilePath
\end{code}
Expand Down Expand Up @@ -182,6 +184,7 @@ mkModuleNameFS s = ModuleName s
moduleNameSlashes :: ModuleName -> String
moduleNameSlashes = dots_to_slashes . moduleNameString
where dots_to_slashes = map (\c -> if c == '.' then pathSeparator else c)
\end{code}

%************************************************************************
Expand All @@ -205,8 +208,13 @@ instance Binary Module where
put_ bh (Module p n) = put_ bh p >> put_ bh n
get bh = do p <- get bh; n <- get bh; return (Module p n)
instance Uniquable PackageId where
getUnique pid = getUnique (packageIdFS pid)
-- This gives a stable ordering, as opposed to the Ord instance which
-- gives an ordering based on the Uniques of the components, which may
-- not be stable from run to run of the compiler.
stableModuleCmp :: Module -> Module -> Ordering
stableModuleCmp (Module p1 n1) (Module p2 n2)
= (packageIdFS p1 `compare` packageIdFS p2) `thenCmp`
(moduleNameFS n1 `compare` moduleNameFS n2)
mkModule :: PackageId -> ModuleName -> Module
mkModule = Module
Expand Down Expand Up @@ -235,9 +243,17 @@ pprPackagePrefix p mod = getPprStyle doc
%************************************************************************

\begin{code}
newtype PackageId = PId FastString deriving( Eq, Ord ) -- includes the version
newtype PackageId = PId FastString deriving( Eq ) -- includes the version
-- here to avoid module loops with PackageConfig
instance Uniquable PackageId where
getUnique pid = getUnique (packageIdFS pid)
-- Note: *not* a stable lexicographic ordering, a faster unique-based
-- ordering.
instance Ord PackageId where
nm1 `compare` nm2 = getUnique nm1 `compare` getUnique nm2
instance Outputable PackageId where
ppr pid = text (packageIdString pid)
Expand Down
20 changes: 3 additions & 17 deletions compiler/basicTypes/Name.lhs
Original file line number Diff line number Diff line change
Expand Up @@ -40,16 +40,13 @@ import {-# SOURCE #-} TypeRep( TyThing )
import OccName
import Module
import SrcLoc
import UniqFM
import Unique
import Maybes
import Binary
import FastMutInt
import FastTypes
import FastString
import Outputable
import Data.IORef
import Data.Array
\end{code}

Expand Down Expand Up @@ -309,20 +306,9 @@ instance NamedThing Name where

\begin{code}
instance Binary Name where
put_ bh name = do
case getUserData bh of {
UserData { ud_symtab_map = symtab_map_ref,
ud_symtab_next = symtab_next } -> do
symtab_map <- readIORef symtab_map_ref
case lookupUFM symtab_map name of
Just (off,_) -> put_ bh off
Nothing -> do
off <- readFastMutInt symtab_next
writeFastMutInt symtab_next (off+1)
writeIORef symtab_map_ref
$! addToUFM symtab_map name (off,name)
put_ bh off
}
put_ bh name =
case getUserData bh of
UserData{ ud_put_name = put_name } -> put_name bh name
get bh = do
i <- get bh
Expand Down
1 change: 1 addition & 0 deletions compiler/basicTypes/OccName.lhs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

\begin{code}
module OccName (
mk_deriv,
-- * The NameSpace type; abstact
NameSpace, tcName, clsName, tcClsName, dataName, varName,
tvName, srcDataName,
Expand Down
11 changes: 11 additions & 0 deletions compiler/coreSyn/CoreLint.lhs
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,17 @@ lintCoreExpr e@(Case scrut var alt_ty alts) =
do { scrut_ty <- lintCoreExpr scrut
; alt_ty <- lintTy alt_ty
; var_ty <- lintTy (idType var)
; let mb_tc_app = splitTyConApp_maybe (idType var)
; case mb_tc_app of
Just (tycon, _)
| debugIsOn &&
isAlgTyCon tycon &&
null (tyConDataCons tycon) ->
pprTrace "case binder's type has no constructors" (ppr e)
$ return ()
_otherwise -> return ()
-- Don't use lintIdBndr on var, because unboxed tuple is legitimate
; subst <- getTvSubst
Expand Down
3 changes: 2 additions & 1 deletion compiler/ghci/InteractiveUI.hs
Original file line number Diff line number Diff line change
Expand Up @@ -1635,7 +1635,8 @@ showPackages = do
pkg_ids <- fmap (preloadPackages . pkgState) getDynFlags
io $ putStrLn $ showSDoc $ vcat $
text "packages currently loaded:"
: map (nest 2 . text . packageIdString) (sort pkg_ids)
: map (nest 2 . text . packageIdString)
(sortBy (compare `on` packageIdFS) pkg_ids)
where showFlag (ExposePackage p) = text $ " -package " ++ p
showFlag (HidePackage p) = text $ " -hide-package " ++ p
showFlag (IgnorePackage p) = text $ " -ignore-package " ++ p
Expand Down
132 changes: 102 additions & 30 deletions compiler/iface/BinIface.hs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@ import SrcLoc
import ErrUtils
import Config
import FastMutInt
import Unique
import Outputable
import FastString

import Data.List
import Data.Word
Expand Down Expand Up @@ -149,7 +151,19 @@ writeBinIface dflags hi_path mod_iface = do
put_ bh symtab_p_p

-- Make some intial state
ud <- newWriteState
symtab_next <- newFastMutInt
writeFastMutInt symtab_next 0
symtab_map <- newIORef emptyUFM
let bin_symtab = BinSymbolTable {
bin_symtab_next = symtab_next,
bin_symtab_map = symtab_map }
dict_next_ref <- newFastMutInt
writeFastMutInt dict_next_ref 0
dict_map_ref <- newIORef emptyUFM
let bin_dict = BinDictionary {
bin_dict_next = dict_next_ref,
bin_dict_map = dict_map_ref }
ud <- newWriteState (putName bin_symtab) (putFastString bin_dict)

-- Put the main thing,
bh <- return $ setUserData bh ud
Expand All @@ -161,8 +175,8 @@ writeBinIface dflags hi_path mod_iface = do
seekBin bh symtab_p -- Seek back to the end of the file

-- Write the symbol table itself
symtab_next <- readFastMutInt (ud_symtab_next ud)
symtab_map <- readIORef (ud_symtab_map ud)
symtab_next <- readFastMutInt symtab_next
symtab_map <- readIORef symtab_map
putSymbolTable bh symtab_next symtab_map
debugTraceMsg dflags 3 (text "writeBinIface:" <+> int symtab_next
<+> text "Names")
Expand All @@ -176,8 +190,8 @@ writeBinIface dflags hi_path mod_iface = do
seekBin bh dict_p -- Seek back to the end of the file

-- Write the dictionary itself
dict_next <- readFastMutInt (ud_dict_next ud)
dict_map <- readIORef (ud_dict_map ud)
dict_next <- readFastMutInt dict_next_ref
dict_map <- readIORef dict_map_ref
putDictionary bh dict_next dict_map
debugTraceMsg dflags 3 (text "writeBinIface:" <+> int dict_next
<+> text "dict entries")
Expand Down Expand Up @@ -248,6 +262,51 @@ serialiseName bh name _ = do
let mod = nameModule name
put_ bh (modulePackageId mod, moduleName mod, nameOccName name)


putName :: BinSymbolTable -> BinHandle -> Name -> IO ()
putName BinSymbolTable{
bin_symtab_map = symtab_map_ref,
bin_symtab_next = symtab_next } bh name
= do
symtab_map <- readIORef symtab_map_ref
case lookupUFM symtab_map name of
Just (off,_) -> put_ bh off
Nothing -> do
off <- readFastMutInt symtab_next
writeFastMutInt symtab_next (off+1)
writeIORef symtab_map_ref
$! addToUFM symtab_map name (off,name)
put_ bh off


data BinSymbolTable = BinSymbolTable {
bin_symtab_next :: !FastMutInt, -- The next index to use
bin_symtab_map :: !(IORef (UniqFM (Int,Name)))
-- indexed by Name
}


putFastString :: BinDictionary -> BinHandle -> FastString -> IO ()
putFastString BinDictionary { bin_dict_next = j_r,
bin_dict_map = out_r} bh f
= do
out <- readIORef out_r
let uniq = getUnique f
case lookupUFM out uniq of
Just (j, _) -> put_ bh j
Nothing -> do
j <- readFastMutInt j_r
put_ bh j
writeFastMutInt j_r (j + 1)
writeIORef out_r $! addToUFM out uniq (j, f)


data BinDictionary = BinDictionary {
bin_dict_next :: !FastMutInt, -- The next index to use
bin_dict_map :: !(IORef (UniqFM (Int,FastString)))
-- indexed by FastString
}

-- -----------------------------------------------------------------------------
-- All the binary instances

Expand Down Expand Up @@ -300,84 +359,88 @@ instance Binary ModIface where
put_ bh (ModIface {
mi_module = mod,
mi_boot = is_boot,
mi_mod_vers = mod_vers,
mi_iface_hash= iface_hash,
mi_mod_hash = mod_hash,
mi_orphan = orphan,
mi_finsts = hasFamInsts,
mi_deps = deps,
mi_usages = usages,
mi_exports = exports,
mi_exp_vers = exp_vers,
mi_exp_hash = exp_hash,
mi_fixities = fixities,
mi_deprecs = deprecs,
mi_decls = decls,
mi_insts = insts,
mi_fam_insts = fam_insts,
mi_rules = rules,
mi_rule_vers = rule_vers,
mi_orphan_hash = orphan_hash,
mi_vect_info = vect_info,
mi_hpc = hpc_info }) = do
put_ bh mod
put_ bh is_boot
put_ bh mod_vers
put_ bh iface_hash
put_ bh mod_hash
put_ bh orphan
put_ bh hasFamInsts
lazyPut bh deps
lazyPut bh usages
put_ bh exports
put_ bh exp_vers
put_ bh exp_hash
put_ bh fixities
lazyPut bh deprecs
put_ bh decls
put_ bh insts
put_ bh fam_insts
lazyPut bh rules
put_ bh rule_vers
put_ bh orphan_hash
put_ bh vect_info
put_ bh hpc_info

get bh = do
mod_name <- get bh
is_boot <- get bh
mod_vers <- get bh
iface_hash <- get bh
mod_hash <- get bh
orphan <- get bh
hasFamInsts <- get bh
deps <- lazyGet bh
usages <- {-# SCC "bin_usages" #-} lazyGet bh
exports <- {-# SCC "bin_exports" #-} get bh
exp_vers <- get bh
exp_hash <- get bh
fixities <- {-# SCC "bin_fixities" #-} get bh
deprecs <- {-# SCC "bin_deprecs" #-} lazyGet bh
decls <- {-# SCC "bin_tycldecls" #-} get bh
insts <- {-# SCC "bin_insts" #-} get bh
fam_insts <- {-# SCC "bin_fam_insts" #-} get bh
rules <- {-# SCC "bin_rules" #-} lazyGet bh
rule_vers <- get bh
orphan_hash <- get bh
vect_info <- get bh
hpc_info <- get bh
return (ModIface {
mi_module = mod_name,
mi_boot = is_boot,
mi_mod_vers = mod_vers,
mi_iface_hash = iface_hash,
mi_mod_hash = mod_hash,
mi_orphan = orphan,
mi_finsts = hasFamInsts,
mi_deps = deps,
mi_usages = usages,
mi_exports = exports,
mi_exp_vers = exp_vers,
mi_exp_hash = exp_hash,
mi_fixities = fixities,
mi_deprecs = deprecs,
mi_decls = decls,
mi_globals = Nothing,
mi_insts = insts,
mi_fam_insts = fam_insts,
mi_rules = rules,
mi_rule_vers = rule_vers,
mi_orphan_hash = orphan_hash,
mi_vect_info = vect_info,
mi_hpc = hpc_info,
-- And build the cached values
mi_dep_fn = mkIfaceDepCache deprecs,
mi_fix_fn = mkIfaceFixCache fixities,
mi_ver_fn = mkIfaceVerCache decls })
mi_hash_fn = mkIfaceHashCache decls })

getWayDescr :: IO String
getWayDescr = do
Expand Down Expand Up @@ -421,22 +484,31 @@ instance (Binary name) => Binary (GenAvailInfo name) where
return (AvailTC ab ac)

instance Binary Usage where
put_ bh usg = do
put_ bh (usg_name usg)
put_ bh (usg_mod usg)
put_ bh usg@UsagePackageModule{} = do
putByte bh 0
put_ bh (usg_mod usg)
put_ bh (usg_mod_hash usg)
put_ bh usg@UsageHomeModule{} = do
putByte bh 1
put_ bh (usg_mod_name usg)
put_ bh (usg_mod_hash usg)
put_ bh (usg_exports usg)
put_ bh (usg_entities usg)
put_ bh (usg_rules usg)

get bh = do
nm <- get bh
mod <- get bh
exps <- get bh
ents <- get bh
rules <- get bh
return (Usage { usg_name = nm, usg_mod = mod,
usg_exports = exps, usg_entities = ents,
usg_rules = rules })
h <- getByte bh
case h of
0 -> do
nm <- get bh
mod <- get bh
return UsagePackageModule { usg_mod = nm, usg_mod_hash = mod }
_ -> do
nm <- get bh
mod <- get bh
exps <- get bh
ents <- get bh
return UsageHomeModule { usg_mod_name = nm, usg_mod_hash = mod,
usg_exports = exps, usg_entities = ents }

instance Binary Deprecations where
put_ bh NoDeprecs = putByte bh 0
Expand Down
Loading

0 comments on commit 526c3af

Please sign in to comment.