diff --git a/Git/PackIndex.hs b/Git/PackIndex.hs new file mode 100644 index 0000000..49055ee --- /dev/null +++ b/Git/PackIndex.hs @@ -0,0 +1,127 @@ +{-# LANGUAGE RecordWildCards #-} +{-# OPTIONS -Wall #-} + +module Git.PackIndex ( + dumpRawPackIndex, + + -- * Paths + idxPath +) where + +import Control.Applicative ((<$>)) +import Control.Monad (forM_) +import qualified Data.ByteString as BS +import Data.Word (Word32) +import Foreign.Ptr +import Foreign.Storable +import Data.Storable.Endian +import System.FilePath +import System.IO.MMap +import System.Posix.Types +import Text.Printf + +import Git.Path + +------------------------------------------------------------ + +data IDX = IDX1 { + idx1Size :: Int + , idx1Fanout :: Ptr (BigEndian Word32) + , idx1Offsets :: Ptr (BigEndian Word32) + } | IDX2 { + idx2Size :: Int + , idx2Fanout :: Ptr (BigEndian Word32) + , idx2SHA1s :: Ptr (BigEndian Word32) + , idx2CRCs :: Ptr (BigEndian Word32) + , idx2Offsets :: Ptr (BigEndian Word32) + , idx264bOffsets :: Ptr (BigEndian Word32) + -- , idx2PackCSum :: Ptr (BigEndian Word32) + -- , idx2IdxCSum :: Ptr (BigEndian Word32) + } + +------------------------------------------------------------ +-- | Public API + +-- | Number of objects in the corresponding .pack file +idxSize :: IDX -> Int +idxSize IDX1{..} = idx1Size +idxSize IDX2{..} = idx2Size + +-- | Nth SHA1 +idxSha1 :: IDX -> Int -> IO (Maybe BS.ByteString) +idxSha1 IDX1{..} n + | n >= idx1Size = return Nothing + | otherwise = do + cs <- peekByteOff idx1Offsets (4 + (n * 24)) + Just <$> BS.packCStringLen (cs, 20) +idxSha1 IDX2{..} n + | n >= idx2Size = return Nothing + | otherwise = do + cs <- peekByteOff idx2SHA1s (n * 20) + Just <$> BS.packCStringLen (cs, 20) + +-- | Nth CRC +idxCRC :: IDX -> Int -> IO (Maybe Word32) +idxCRC IDX1{} _ = return Nothing +idxCRC IDX2{..} n + | n >= idx2Size = return Nothing + | otherwise = do + BE crc <- peekElemOff idx2CRCs n + return (Just crc) + +-- | Nth offset +idxOffset :: IDX -> Int -> IO (Maybe FileOffset) +idxOffset IDX1{..} n + | n >= idx1Size = return Nothing + | otherwise = do + BE off <- peekByteOff idx1Offsets (n * 24) + return . Just . fromIntegral $ (off :: Word32) +idxOffset IDX2{..} n + | n >= idx2Size = return Nothing + | otherwise = do + BE off <- peekElemOff idx2Offsets n + return . Just . fromIntegral $ off + +------------------------------------------------------------ + +-- | Generate the pathname for a given packfile +idxPath :: String -> IO FilePath +idxPath idx = gitPath ("objects" "pack" ("pack-" ++ idx ++ ".idx")) + +idxHeader :: Word32 +idxHeader = 0xff744f63 + +dumpRawPackIndex :: FilePath -> IO String +dumpRawPackIndex fp = do + (ptr, rawsize, offset, size) <- mmapFilePtr fp ReadOnly Nothing + let start :: Ptr (BigEndian Word32) + start = ptr `plusPtr` offset + BE hdr <- peek start + idx <- if (hdr == idxHeader) + then do + BE ver <- peekElemOff start 1 + case ver of + 2 -> mkIDX2 start size + _ -> error "Unknown version" + else mkIDX1 start size + return $ "Mapped region offset " ++ (show offset) ++ " size " ++ (show size) ++ " with " ++ show (idxSize idx) ++ " objects" + +mkIDX1 :: Ptr (BigEndian Word32) -> Int -> IO IDX +mkIDX1 start size = do + let fanout = start + BE n <- peekElemOff fanout 255 + let n' = fromIntegral (n :: Word32) + let offsets = fanout `plusPtr` (256 * 4) + return (IDX1 n' fanout offsets) + +mkIDX2 :: Ptr (BigEndian Word32) -> Int -> IO IDX +mkIDX2 start size = do + let fanout = start `plusPtr` (2 * 4) + BE n <- peekElemOff fanout 255 + let n' = fromIntegral (n :: Word32) + let sha1s = fanout `plusPtr` (256 * 4) + crcs = sha1s `plusPtr` (n' * 20) + offsets = crcs `plusPtr` (n' * 4) + offset64s = offsets `plusPtr` (n' * 4) + return (IDX2 n' fanout sha1s crcs offsets offset64s) + diff --git a/ght.cabal b/ght.cabal index 63ca49f..d7d81dc 100644 --- a/ght.cabal +++ b/ght.cabal @@ -22,10 +22,13 @@ library old-locale, time, iteratee, - iteratee-compress + iteratee-compress, + mmap, + storable-endian Exposed-Modules: Git.Blob Git.Commit Git.Pack + Git.PackIndex Git.Path ------------------------------------------------------------ diff --git a/tools/ght.hs b/tools/ght.hs index e266990..f8ae912 100644 --- a/tools/ght.hs +++ b/tools/ght.hs @@ -12,6 +12,7 @@ import UI.Command import Git.Blob import Git.Commit import Git.Pack +import Git.PackIndex import Git.Path -- show-prefix, show-root use these @@ -146,6 +147,29 @@ fPack (pack:_) = do then return pack else packPath pack +------------------------------------------------------------ +-- show-idx +-- + +ghtShowIdx = defCmd { + cmdName = "show-idx", + cmdHandler = ghtShowIdxHandler, + cmdCategory = "Blob management", + cmdShortDesc = "Show the raw dump of a pack index", + cmdExamples = [("Show raw contents of pack pack-abcd.idx", "abcd")] + } + +ghtShowIdxHandler = do + idx <- (liftIO . fIdx =<< appArgs) + x <- liftIO $ dumpRawPackIndex idx + liftIO $ putStrLn x + +fIdx (idx:_) = do + exists <- doesFileExist idx + if exists + then return idx + else idxPath idx + ------------------------------------------------------------ -- show-raw -- @@ -218,7 +242,7 @@ ght = def { appCategories = ["Reporting", "Blob management"], appSeeAlso = ["git"], appProject = "Ght", - appCmds = [ghtShowPrefix, ghtShowRoot, ghtShow, ghtLog, ghtShowRaw, ghtShowPack, ghtHashObject, ghtBranch] + appCmds = [ghtShowPrefix, ghtShowRoot, ghtShow, ghtLog, ghtShowRaw, ghtShowPack, ghtShowIdx, ghtHashObject, ghtBranch] } longDesc = "This is a bunch of trivial routines for inspecting git repositories. It is in no way useful beyond that."