From e1b4878d683f7da994405015358e92e963928c21 Mon Sep 17 00:00:00 2001 From: Michael Sloan Date: Tue, 2 Aug 2016 01:39:15 -0700 Subject: [PATCH] Add new version tracking implementation #53 --- ChangeLog.md | 12 ++ package.yaml | 5 +- src/Data/Store/TH/Internal.hs | 2 +- src/Data/Store/TypeHash/Internal.hs | 4 + src/Data/Store/Version.hs | 279 ++++++++++++++++++++++++++++ stack.yaml | 2 +- store.cabal | 21 ++- 7 files changed, 318 insertions(+), 7 deletions(-) create mode 100644 src/Data/Store/Version.hs diff --git a/ChangeLog.md b/ChangeLog.md index 5d446a5..3e4230d 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -1,5 +1,17 @@ # ChangeLog +## 0.2.1.0 + +Release notes: + +* Adds `Data.Store.Version` and deprecates `Data.Store.TypeHash`. + The new functionality is similar to TypeHash, but there are much fewer false + positives of hashes changing. + +Other enhancements: + +* Now exports types related to generics + ## 0.2.0.0 Release notes: diff --git a/package.yaml b/package.yaml index 855fe2b..9d9ef4d 100644 --- a/package.yaml +++ b/package.yaml @@ -29,7 +29,7 @@ dependencies: - store-core >=0.2 && <0.3 # Utilities package tightly coupled to the TH code - - th-utilities >=0.1.1.0 + - th-utilities >=0.2 # Due to removal of 'internal' from MonadPrim in 0.6 - primitive >=0.6 @@ -42,12 +42,15 @@ dependencies: # Added to appease at least the lower part of the PvP - array >=0.5.0.0 - base-orphans >=0.4.3 + - base64-bytestring >= 0.1.1 - bytestring >=0.10.4.0 - conduit >=1.2.3.1 - containers >=0.5.5.1 - cryptohash >=0.11.6 - deepseq >=1.3.0.2 + - directory >= 1.2 - fail >=4.9.0.0 + - filepath >= 1.3 - ghc-prim >=0.3.1.0 - hashable >=1.2.3.1 - hspec >=2.1.2 diff --git a/src/Data/Store/TH/Internal.hs b/src/Data/Store/TH/Internal.hs index 2547f10..e9e513f 100644 --- a/src/Data/Store/TH/Internal.hs +++ b/src/Data/Store/TH/Internal.hs @@ -45,7 +45,7 @@ import Language.Haskell.TH.Syntax (lift) import Prelude import Safe (headMay) import TH.Derive (Deriver(..)) -import TH.ReifyDataType +import TH.ReifySimple import TH.Utilities (expectTyCon1, dequalify, plainInstanceD) instance Deriver (Store a) where diff --git a/src/Data/Store/TypeHash/Internal.hs b/src/Data/Store/TypeHash/Internal.hs index 0ba70b4..6ad7fed 100644 --- a/src/Data/Store/TypeHash/Internal.hs +++ b/src/Data/Store/TypeHash/Internal.hs @@ -30,6 +30,10 @@ import Language.Haskell.TH.ReifyMany (reifyMany) import Language.Haskell.TH.Syntax (Lift(lift)) import Prelude +{-# DEPRECATED mkManyHasTypeHash, mkHasTypeHash + "Use of Data.Store.TypeHash isn't recommended, as the hashes are too unstable for most uses. Please instead consider using Data.Store.Version. See https://github.com/fpco/store/issues/53" + #-} + newtype Tagged a = Tagged { unTagged :: a } deriving (Eq, Ord, Show, Data, Typeable, Generic) diff --git a/src/Data/Store/Version.hs b/src/Data/Store/Version.hs new file mode 100644 index 0000000..4e8b4ca --- /dev/null +++ b/src/Data/Store/Version.hs @@ -0,0 +1,279 @@ +{-# LANGUAGE DeriveDataTypeable #-} +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TemplateHaskell #-} +-- | This module provides utilities which help ensure that we aren't +-- attempting to de-serialize data that is an older or newer version. +-- The 'WithVersion' utility wraps up a datatype along with a version +-- tag. This version tag can either be provided by the user +-- ('namedVersionConfig'), or use a computed hash +-- ('hashedVersionConfig'). +-- +-- The magic here is using an SYB traversal ('Data') to get the +-- structure of all the data-types involved. This info is rendered to +-- text and hashed to yield a hash which describes it. +module Data.Store.Version + ( StoreVersion(..) + , WithVersion(..) + , VersionConfig(..) + , hashedVersionConfig + , namedVersionConfig + , wrapVersion + , checkVersion + ) where + +import Control.Exception +import Control.Monad +import Control.Monad.Trans.State +import qualified Crypto.Hash.SHA1 as SHA1 +import qualified Data.ByteString as BS +import qualified Data.ByteString.Base64.URL as B64Url +import qualified Data.ByteString.Char8 as BS8 +import Data.Generics hiding (DataType, Generic) +import qualified Data.Map as M +import qualified Data.Set as S +import Data.Store.Internal +import qualified Data.Text as T +import Data.Text.Encoding (encodeUtf8, decodeUtf8, decodeUtf8With) +import Data.Text.Encoding.Error (lenientDecode) +import qualified Data.Text.IO as T +import Data.Typeable.Internal (TypeRep(..)) +import GHC.Generics (Generic) +import Language.Haskell.TH +import System.Directory +import System.Environment +import System.FilePath +import TH.RelativePaths +import TH.Utilities + +newtype StoreVersion = StoreVersion { unStoreVersion :: BS.ByteString } + deriving (Eq, Show, Ord, Data, Typeable, Generic, Store) + +data WithVersion a = WithVersion a StoreVersion + deriving (Eq, Show, Ord, Data, Typeable, Generic) + +instance Store a => Store (WithVersion a) + +-- | Configuration for the version checking of a particular type. +data VersionConfig a = VersionConfig + { vcExpectedHash :: Maybe String + -- ^ When set, specifies the hash which is expected to be computed. + , vcManualName :: Maybe String + -- ^ When set, specifies the name to instead use to tag the data. + , vcIgnore :: S.Set String + -- ^ DataTypes to ignore. + } deriving (Eq, Show, Data, Typeable, Generic) + +hashedVersionConfig :: String -> VersionConfig a +hashedVersionConfig hash = VersionConfig + { vcExpectedHash = Just hash + , vcManualName = Nothing + , vcIgnore = S.empty + } + +namedVersionConfig :: String -> String -> VersionConfig a +namedVersionConfig name hash = VersionConfig + { vcExpectedHash = Just hash + , vcManualName = Just name + , vcIgnore = S.empty + } + +wrapVersion :: Data a => VersionConfig a -> Q Exp +wrapVersion = impl Wrap + +checkVersion :: Data a => VersionConfig a -> Q Exp +checkVersion = impl Check + +data WhichFunc = Wrap | Check + +impl :: forall a. Data a => WhichFunc -> VersionConfig a -> Q Exp +impl wf vc = do + let proxy = Proxy :: Proxy a + info = encodeUtf8 (T.pack (getStructureInfo (vcIgnore vc) proxy)) + hash = SHA1.hash info + hashb64 = BS8.unpack (B64Url.encode hash) + version = case vcManualName vc of + Nothing -> [e| StoreVersion hash |] + Just name -> [e| StoreVersion name |] + case vcExpectedHash vc of + Nothing -> return () + Just expectedHash -> do + let shownType = showsQualTypeRep 0 (typeRep proxy) "" + -- FIXME: sanitize expected and handle null + path <- storeVersionedPath expectedHash + if hashb64 == expectedHash + then writeVersionInfo path shownType info + else do + newPath <- storeVersionedPath hashb64 + writeVersionInfo newPath shownType info + exists <- runIO $ doesFileExist path + extraMsg <- if not exists + then return ", but no file found with previously stored structural info." + else return (", use something like the following to compare with the old structural info:\n\n" ++ + "diff -u " ++ show path ++ " " ++ show newPath) + error $ + "\nData.Store.Version computed hash " ++ show hashb64 ++ + ", but expected hash " ++ show expectedHash ++ " is specified.\n" ++ + "The data used to construct the hash has been written to " ++ show newPath ++ + extraMsg ++ "\n" + case wf of + Wrap -> [e| (\x -> (x :: $(typeRepToType (typeRep proxy))) `WithVersion` $(version)) |] + Check -> [e| (\(WithVersion x gotVersion) -> + if gotVersion /= $(version) + then Left (VersionCheckException + { expectedVersion = $(version) + , receivedVersion = gotVersion + }) + else Right x) |] + +{- + txtWithComments <- runIO $ T.readFile path + let txt = T.unlines $ dropWhile ("--" `T.isPrefixOf`) $ T.lines txtWithComments + storedHash = BS8.unpack (B64Url.encode (SHA1.hash (encodeUtf8 txt))) + if storedHash == expectedHash + then return (", compare with the structural info that matches the hash, found in " ++ show path) + else return (", but the old file found also doesn't match the hash.") +-} + +writeVersionInfo :: FilePath -> String -> BS.ByteString -> Q () +writeVersionInfo path shownType info = runIO $ do + createDirectoryIfMissing True (takeDirectory path) + T.writeFile path $ T.unlines $ + [ T.pack ("-- Structural info for type " ++ shownType) + , "-- Generated by an invocation of functions in Data.Store.Version" + ] ++ T.lines (decodeUtf8 info) + +storeVersionedPath :: String -> Q FilePath +storeVersionedPath filename = do + mstack <- runIO (lookupEnv "STACK_EXE") + let dirName = case mstack of + Just _ -> ".stack-work" + Nothing -> "dist" + pathRelativeToCabalPackage (dirName "store-versioned" filename) + +-- Implementation details + +data S = S + { sResults :: M.Map String String + , sCurResult :: String + , sFieldNames :: [String] + } + +getStructureInfo :: forall a. Data a => S.Set String -> Proxy a -> String +getStructureInfo ignore = renderResults . sResults . flip execState (S M.empty "" []) . getStructureInfo' ignore + where + renderResults = unlines . map (\(k, v) -> k ++ v) . M.toAscList + +getStructureInfo' :: forall a. Data a => S.Set String -> Proxy a -> State S () +getStructureInfo' ignore _ = do + s0 <- get + when (M.notMember label (sResults s0)) $ + if S.member shownType ignore + then setResult " ignored\n" + else case dataTypeRep (dataTypeOf (undefined :: a)) of + AlgRep cs -> do + setResult "" + mapM_ goConstr (zip (True : repeat False) cs) + result <- gets sCurResult + setResult (if null cs then result ++ "\n" else result) + IntRep -> setResult " has IntRep\n" + FloatRep -> setResult " has FloatRep\n" + CharRep -> setResult " has CharRep\n" + NoRep + | S.member shownType ignore -> setResult " has NoRep\n" + | otherwise -> error $ + "\nNoRep in Data.Store.Version for " ++ show shownType ++ + ".\nIn the future it will be possible to statically " ++ + "declare a global serialization version for this type. " ++ + "\nUntil then you will need to use 'vcIgnore', and " ++ + "understand that serialization changes for affected types " ++ + "will not be detected.\n" + where + setResult x = + modify (\s -> S + { sResults = M.insert label x (sResults s) + , sCurResult = "" + , sFieldNames = [] + }) + label = "data-type " ++ shownType + shownType = showsQualTypeRep 0 (typeRep (Proxy :: Proxy a)) "" + goConstr :: (Bool, Constr) -> State S () + goConstr (isFirst, c) = do + modify (\s -> s + { sFieldNames = constrFields c ++ map (\ix -> "slot " ++ show (ix :: Int)) [0..] + , sCurResult = sCurResult s ++ (if isFirst then "\n = " else " | ") ++ showConstr c ++ " {\n" + }) + void (fromConstrM goField c :: State S a) + modify (\s -> s { sCurResult = sCurResult s ++ " }\n" }) + goField :: forall b. Data b => State S b + goField = do + s <- get + case sFieldNames s of + [] -> fail "impossible case in getStructureInfo'" + (name:names) -> do + getStructureInfo' ignore (Proxy :: Proxy b) + s' <- get + put s + { sResults = sResults s' + , sCurResult = sCurResult s ++ " " ++ name ++ " :: " ++ showsQualTypeRep 0 (typeRep (Proxy :: Proxy b)) "\n" + , sFieldNames = names + } + return (error "unexpected evaluation") + +showsQualTypeRep :: Int -> TypeRep -> ShowS +showsQualTypeRep p (TypeRep _ tycon kinds tys) = + case tys of + [] -> showsQualTyCon tycon + [x] | tycon == tcList -> showChar '[' . showsQualTypeRep 0 x . showChar ']' + where + [a,r] | tycon == tcFun -> showParen (p > 8) $ + showsQualTypeRep 9 a . + showString " -> " . + showsQualTypeRep 8 r + xs | isTupleTyCon tycon -> showTuple xs + | otherwise -> + showParen (p > 9) $ + showsQualTyCon tycon . + showChar ' ' . + showArgs (showChar ' ') (kinds ++ tys) + +showsQualTyCon :: TyCon -> ShowS +showsQualTyCon tc = showString (tyConModule tc ++ "." ++ tyConName tc) + +isTupleTyCon :: TyCon -> Bool +isTupleTyCon tc + | ('(':',':_) <- tyConName tc = True + | otherwise = False + +showArgs :: ShowS -> [TypeRep] -> ShowS +showArgs _ [] = id +showArgs _ [a] = showsQualTypeRep 10 a +showArgs sep (a:as) = showsQualTypeRep 10 a . sep . showArgs sep as + +showTuple :: [TypeRep] -> ShowS +showTuple args = showChar '(' + . showArgs (showChar ',') args + . showChar ')' + +tcList :: TyCon +tcList = tyConOf (Proxy :: Proxy [()]) + +tcFun :: TyCon +tcFun = tyConOf (Proxy :: Proxy (Int -> Int)) + +tyConOf :: Typeable a => Proxy a -> TyCon +tyConOf = typeRepTyCon . typeRep + +data VersionCheckException = VersionCheckException + { expectedVersion :: StoreVersion + , receivedVersion :: StoreVersion + } deriving (Typeable, Show) + +instance Exception VersionCheckException where + displayException VersionCheckException{..} = + "Mismatch detected by Data.Store.Version - expected " ++ + T.unpack (decodeUtf8With lenientDecode (unStoreVersion expectedVersion)) ++ " but got " ++ + T.unpack (decodeUtf8With lenientDecode (unStoreVersion receivedVersion)) diff --git a/stack.yaml b/stack.yaml index ef1f742..ba4ac92 100644 --- a/stack.yaml +++ b/stack.yaml @@ -6,5 +6,5 @@ packages: extra-dep: true extra-deps: - th-lift-instances-0.1.7 -- th-utilities-0.1.1.0 +- th-utilities-0.2.0.0 - th-reify-many-0.1.6 diff --git a/store.cabal b/store.cabal index 515b75b..047dfdb 100644 --- a/store.cabal +++ b/store.cabal @@ -42,23 +42,27 @@ library Data.Store.TH.Internal Data.Store.TypeHash Data.Store.TypeHash.Internal + Data.Store.Version System.IO.ByteBuffer other-modules: Data.Store.Impl build-depends: base >=4.7 && <5 , store-core >=0.2 && <0.3 - , th-utilities >=0.1.1.0 + , th-utilities >=0.2 , primitive >=0.6 , th-reify-many >=0.1.6 , array >=0.5.0.0 , base-orphans >=0.4.3 + , base64-bytestring >= 0.1.1 , bytestring >=0.10.4.0 , conduit >=1.2.3.1 , containers >=0.5.5.1 , cryptohash >=0.11.6 , deepseq >=1.3.0.2 + , directory >= 1.2 , fail >=4.9.0.0 + , filepath >= 1.3 , ghc-prim >=0.3.1.0 , hashable >=1.2.3.1 , hspec >=2.1.2 @@ -96,17 +100,20 @@ test-suite store-test build-depends: base >=4.7 && <5 , store-core >=0.2 && <0.3 - , th-utilities >=0.1.1.0 + , th-utilities >=0.2 , primitive >=0.6 , th-reify-many >=0.1.6 , array >=0.5.0.0 , base-orphans >=0.4.3 + , base64-bytestring >= 0.1.1 , bytestring >=0.10.4.0 , conduit >=1.2.3.1 , containers >=0.5.5.1 , cryptohash >=0.11.6 , deepseq >=1.3.0.2 + , directory >= 1.2 , fail >=4.9.0.0 + , filepath >= 1.3 , ghc-prim >=0.3.1.0 , hashable >=1.2.3.1 , hspec >=2.1.2 @@ -147,17 +154,20 @@ test-suite store-weigh build-depends: base >=4.7 && <5 , store-core >=0.2 && <0.3 - , th-utilities >=0.1.1.0 + , th-utilities >=0.2 , primitive >=0.6 , th-reify-many >=0.1.6 , array >=0.5.0.0 , base-orphans >=0.4.3 + , base64-bytestring >= 0.1.1 , bytestring >=0.10.4.0 , conduit >=1.2.3.1 , containers >=0.5.5.1 , cryptohash >=0.11.6 , deepseq >=1.3.0.2 + , directory >= 1.2 , fail >=4.9.0.0 + , filepath >= 1.3 , ghc-prim >=0.3.1.0 , hashable >=1.2.3.1 , hspec >=2.1.2 @@ -204,17 +214,20 @@ benchmark store-bench build-depends: base >=4.7 && <5 , store-core >=0.2 && <0.3 - , th-utilities >=0.1.1.0 + , th-utilities >=0.2 , primitive >=0.6 , th-reify-many >=0.1.6 , array >=0.5.0.0 , base-orphans >=0.4.3 + , base64-bytestring >= 0.1.1 , bytestring >=0.10.4.0 , conduit >=1.2.3.1 , containers >=0.5.5.1 , cryptohash >=0.11.6 , deepseq >=1.3.0.2 + , directory >= 1.2 , fail >=4.9.0.0 + , filepath >= 1.3 , ghc-prim >=0.3.1.0 , hashable >=1.2.3.1 , hspec >=2.1.2