Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
add Git.PackIndex, ght show-idx
Low-level routines for parsing .idx files, version 1 and 2
  • Loading branch information
kfish committed May 10, 2011
1 parent 1db620d commit 120c2e1
Show file tree
Hide file tree
Showing 3 changed files with 156 additions and 2 deletions.
127 changes: 127 additions & 0 deletions Git/PackIndex.hs
@@ -0,0 +1,127 @@
{-# LANGUAGE RecordWildCards #-}
{-# OPTIONS -Wall #-}

module Git.PackIndex (
dumpRawPackIndex,

-- * Paths
idxPath
) where

import Control.Applicative ((<$>))
import Control.Monad (forM_)
import qualified Data.ByteString as BS
import Data.Word (Word32)
import Foreign.Ptr
import Foreign.Storable
import Data.Storable.Endian
import System.FilePath
import System.IO.MMap
import System.Posix.Types
import Text.Printf

import Git.Path

------------------------------------------------------------

data IDX = IDX1 {
idx1Size :: Int
, idx1Fanout :: Ptr (BigEndian Word32)
, idx1Offsets :: Ptr (BigEndian Word32)
} | IDX2 {
idx2Size :: Int
, idx2Fanout :: Ptr (BigEndian Word32)
, idx2SHA1s :: Ptr (BigEndian Word32)
, idx2CRCs :: Ptr (BigEndian Word32)
, idx2Offsets :: Ptr (BigEndian Word32)
, idx264bOffsets :: Ptr (BigEndian Word32)
-- , idx2PackCSum :: Ptr (BigEndian Word32)
-- , idx2IdxCSum :: Ptr (BigEndian Word32)
}

------------------------------------------------------------
-- | Public API

-- | Number of objects in the corresponding .pack file
idxSize :: IDX -> Int
idxSize IDX1{..} = idx1Size
idxSize IDX2{..} = idx2Size

-- | Nth SHA1
idxSha1 :: IDX -> Int -> IO (Maybe BS.ByteString)
idxSha1 IDX1{..} n
| n >= idx1Size = return Nothing
| otherwise = do
cs <- peekByteOff idx1Offsets (4 + (n * 24))
Just <$> BS.packCStringLen (cs, 20)
idxSha1 IDX2{..} n
| n >= idx2Size = return Nothing
| otherwise = do
cs <- peekByteOff idx2SHA1s (n * 20)
Just <$> BS.packCStringLen (cs, 20)

-- | Nth CRC
idxCRC :: IDX -> Int -> IO (Maybe Word32)
idxCRC IDX1{} _ = return Nothing
idxCRC IDX2{..} n
| n >= idx2Size = return Nothing
| otherwise = do
BE crc <- peekElemOff idx2CRCs n
return (Just crc)

-- | Nth offset
idxOffset :: IDX -> Int -> IO (Maybe FileOffset)
idxOffset IDX1{..} n
| n >= idx1Size = return Nothing
| otherwise = do
BE off <- peekByteOff idx1Offsets (n * 24)
return . Just . fromIntegral $ (off :: Word32)
idxOffset IDX2{..} n
| n >= idx2Size = return Nothing
| otherwise = do
BE off <- peekElemOff idx2Offsets n
return . Just . fromIntegral $ off

------------------------------------------------------------

-- | Generate the pathname for a given packfile
idxPath :: String -> IO FilePath
idxPath idx = gitPath ("objects" </> "pack" </> ("pack-" ++ idx ++ ".idx"))

idxHeader :: Word32
idxHeader = 0xff744f63

dumpRawPackIndex :: FilePath -> IO String
dumpRawPackIndex fp = do
(ptr, rawsize, offset, size) <- mmapFilePtr fp ReadOnly Nothing
let start :: Ptr (BigEndian Word32)
start = ptr `plusPtr` offset
BE hdr <- peek start
idx <- if (hdr == idxHeader)
then do
BE ver <- peekElemOff start 1
case ver of
2 -> mkIDX2 start size
_ -> error "Unknown version"
else mkIDX1 start size
return $ "Mapped region offset " ++ (show offset) ++ " size " ++ (show size) ++ " with " ++ show (idxSize idx) ++ " objects"

mkIDX1 :: Ptr (BigEndian Word32) -> Int -> IO IDX
mkIDX1 start size = do
let fanout = start
BE n <- peekElemOff fanout 255
let n' = fromIntegral (n :: Word32)
let offsets = fanout `plusPtr` (256 * 4)
return (IDX1 n' fanout offsets)

mkIDX2 :: Ptr (BigEndian Word32) -> Int -> IO IDX
mkIDX2 start size = do
let fanout = start `plusPtr` (2 * 4)
BE n <- peekElemOff fanout 255
let n' = fromIntegral (n :: Word32)
let sha1s = fanout `plusPtr` (256 * 4)
crcs = sha1s `plusPtr` (n' * 20)
offsets = crcs `plusPtr` (n' * 4)
offset64s = offsets `plusPtr` (n' * 4)
return (IDX2 n' fanout sha1s crcs offsets offset64s)

5 changes: 4 additions & 1 deletion ght.cabal
Expand Up @@ -22,10 +22,13 @@ library
old-locale,
time,
iteratee,
iteratee-compress
iteratee-compress,
mmap,
storable-endian
Exposed-Modules: Git.Blob
Git.Commit
Git.Pack
Git.PackIndex
Git.Path

------------------------------------------------------------
Expand Down
26 changes: 25 additions & 1 deletion tools/ght.hs
Expand Up @@ -12,6 +12,7 @@ import UI.Command
import Git.Blob
import Git.Commit
import Git.Pack
import Git.PackIndex
import Git.Path

-- show-prefix, show-root use these
Expand Down Expand Up @@ -146,6 +147,29 @@ fPack (pack:_) = do
then return pack
else packPath pack

------------------------------------------------------------
-- show-idx
--

ghtShowIdx = defCmd {
cmdName = "show-idx",
cmdHandler = ghtShowIdxHandler,
cmdCategory = "Blob management",
cmdShortDesc = "Show the raw dump of a pack index",
cmdExamples = [("Show raw contents of pack pack-abcd.idx", "abcd")]
}

ghtShowIdxHandler = do
idx <- (liftIO . fIdx =<< appArgs)
x <- liftIO $ dumpRawPackIndex idx
liftIO $ putStrLn x

fIdx (idx:_) = do
exists <- doesFileExist idx
if exists
then return idx
else idxPath idx

------------------------------------------------------------
-- show-raw
--
Expand Down Expand Up @@ -218,7 +242,7 @@ ght = def {
appCategories = ["Reporting", "Blob management"],
appSeeAlso = ["git"],
appProject = "Ght",
appCmds = [ghtShowPrefix, ghtShowRoot, ghtShow, ghtLog, ghtShowRaw, ghtShowPack, ghtHashObject, ghtBranch]
appCmds = [ghtShowPrefix, ghtShowRoot, ghtShow, ghtLog, ghtShowRaw, ghtShowPack, ghtShowIdx, ghtHashObject, ghtBranch]
}

longDesc = "This is a bunch of trivial routines for inspecting git repositories. It is in no way useful beyond that."
Expand Down

0 comments on commit 120c2e1

Please sign in to comment.