Skip to content
Browse files

Allow some out-of-line primops to have inline versions

An out-of-line primop can provide an inline version e.g. in cases
where we have extra static information that would allow us to emit a
more efficient inline version.
  • Loading branch information...
1 parent 0c2cb6f commit 8f544f826897e28350fca157f99a31468e4d0322 @tibbe committed Mar 7, 2014
Showing with 58 additions and 32 deletions.
  1. +58 −32 compiler/codeGen/StgCmmPrim.hs
View
90 compiler/codeGen/StgCmmPrim.hs
@@ -86,36 +86,63 @@ cgOpApp (StgPrimOp TagToEnumOp) [arg] res_ty
-- That won't work.
tycon = tyConAppTyCon res_ty
-cgOpApp (StgPrimOp primop) args res_ty
- | primOpOutOfLine primop
- = do { cmm_args <- getNonVoidArgAmodes args
- ; let fun = CmmLit (CmmLabel (mkRtsPrimOpLabel primop))
- ; emitCall (NativeNodeCall, NativeReturn) fun cmm_args }
-
- | ReturnsPrim VoidRep <- result_info
- = do cgPrimOp [] primop args
- emitReturn []
-
- | ReturnsPrim rep <- result_info
- = do dflags <- getDynFlags
- res <- newTemp (primRepCmmType dflags rep)
- cgPrimOp [res] primop args
- emitReturn [CmmReg (CmmLocal res)]
-
- | ReturnsAlg tycon <- result_info, isUnboxedTupleTyCon tycon
- = do (regs, _hints) <- newUnboxedTupleRegs res_ty
- cgPrimOp regs primop args
- emitReturn (map (CmmReg . CmmLocal) regs)
-
- | otherwise = panic "cgPrimop"
- where
- result_info = getPrimOpResultInfo primop
+cgOpApp (StgPrimOp primop) args res_ty = do
+ dflags <- getDynFlags
+ cmm_args <- getNonVoidArgAmodes args
+ case shouldInlinePrimOp dflags primop cmm_args of
+ Nothing -> do let fun = CmmLit (CmmLabel (mkRtsPrimOpLabel primop))
+ emitCall (NativeNodeCall, NativeReturn) fun cmm_args
+
+ Just f
+ | ReturnsPrim VoidRep <- result_info
+ -> do f []
+ emitReturn []
+
+ | ReturnsPrim rep <- result_info
+ -> do dflags <- getDynFlags
+ res <- newTemp (primRepCmmType dflags rep)
+ f [res]
+ emitReturn [CmmReg (CmmLocal res)]
+
+ | ReturnsAlg tycon <- result_info, isUnboxedTupleTyCon tycon
+ -> do (regs, _hints) <- newUnboxedTupleRegs res_ty
+ f regs
+ emitReturn (map (CmmReg . CmmLocal) regs)
+
+ | otherwise -> panic "cgPrimop"
+ where
+ result_info = getPrimOpResultInfo primop
cgOpApp (StgPrimCallOp primcall) args _res_ty
= do { cmm_args <- getNonVoidArgAmodes args
; let fun = CmmLit (CmmLabel (mkPrimCallLabel primcall))
; emitCall (NativeNodeCall, NativeReturn) fun cmm_args }
+-- | Decide whether an out-of-line primop should be replaced by an
+-- inline implementation. This might happen e.g. if there's enough
+-- static information, such as statically know arguments, to emit a
+-- more efficient implementation inline.
+--
+-- Returns 'Nothing' if this primop should use its out-of-line
+-- implementation (defined elsewhere) and 'Just' together with a code
+-- generating function that takes the output regs as arguments
+-- otherwise.
+shouldInlinePrimOp :: DynFlags
+ -> PrimOp -- ^ The primop
+ -> [CmmExpr] -- ^ The primop arguments
+ -> Maybe ([LocalReg] -> FCode ())
+shouldInlinePrimOp dflags NewArrayOp [(CmmLit (CmmInt n _)), init]
+ | n <= inlineAllocLimit dflags = Just $ \ [res] -> doNewArrayOp res n init
+shouldInlinePrimOp dflags primop args
+ | primOpOutOfLine primop = Nothing
+ | otherwise = Just $ \ regs -> emitPrimOp dflags regs primop args
+
+-- TODO: Several primops, such as 'copyArray#', only have an inline
+-- implementation (below) but could possibly have both an inline
+-- implementation and an out-of-line implementation, just like
+-- 'newArray#'. This would lower the amount of code generated,
+-- hopefully without a performance impact (needs to be measured).
+
---------------------------------------------------
cgPrimOp :: [LocalReg] -- where to put the results
-> PrimOp -- the op
@@ -240,11 +267,6 @@ emitPrimOp dflags [] WriteMutVarOp [mutv,var]
(CmmLit (CmmLabel mkDirty_MUT_VAR_Label))
[(CmmReg (CmmGlobal BaseReg), AddrHint), (mutv,AddrHint)]
--- Inline if size is small and statically know, otherwise fall through
--- to general (i.e. out-of-line) case.
-emitPrimOp dflags [res] NewArrayOp [(CmmLit (CmmInt n _))]
- | n <= inlineAllocLimit dflags = doNewArrayOp dflags res n
-
-- #define sizzeofByteArrayzh(r,a) \
-- r = ((StgArrWords *)(a))->bytes
emitPrimOp dflags [res] SizeofByteArrayOp [arg]
@@ -1503,8 +1525,12 @@ doSetByteArrayOp ba off len c
-- ----------------------------------------------------------------------------
-- Allocating arrays
-doNewArrayOp :: DynFlags -> CmmFormal -> Integer -> FCode ()
-doNewArrayOp dflags res_r n = do
+-- Takes a register to return the newly allocated array in, the size
+-- of the new array, and an initial value for the elements. Allocates
+-- a new 'MutableArray#'.
+doNewArrayOp :: CmmFormal -> Integer -> CmmExpr -> FCode ()
+doNewArrayOp res_r n init = do
+ dflags <- getDynFlags
tickyAllocPrim (mkIntExpr dflags (arrPtrsHdrSize dflags))
(cmmMulWord dflags (mkIntExpr dflags (fromInteger n)) (wordSize dflags))
(zeroExpr dflags)
@@ -1513,7 +1539,7 @@ doNewArrayOp dflags res_r n = do
-- the existing heap check to allocate inline.
virt_hp <- getVirtHp
- let card_bytes = cardRoundUp dflags (fromIntegral n)
+ let card_bytes = cardRoundUp dflags (fromInteger n)
size = fromIntegral n + bytesToWordsRoundUp dflags card_bytes
words = arrPtrsHdrSizeWords dflags + size

0 comments on commit 8f544f8

Please sign in to comment.
Something went wrong with that request. Please try again.