-
Notifications
You must be signed in to change notification settings - Fork 192
/
Create.hs
75 lines (70 loc) · 3.53 KB
/
Create.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
{-|
This script downloads the public suffix list from mozilla's website, and uses
Network.PublicSuffixList.Create.sink to construct an opaque data structure which can
be used with the isSuffix function in Network.PublicSuffixList.Lookup. It then
generates a source file with the contents of this data structure so that
applications can link against this source file and get knowledget of public suffixes
without doing anything at runtime.
-}
import qualified Data.ByteString as BS
import qualified Data.ByteString.UTF8 as U8
import qualified Data.Conduit as C
import qualified Data.Text as T
import Data.Time.Clock
import qualified Network.HTTP.Conduit as HC
import Data.Conduit.Binary (conduitFile)
import System.IO
import Network.PublicSuffixList.Create
import Network.PublicSuffixList.Types
import Network.PublicSuffixList.Serialize
generateDataStructure :: String -> IO (DataStructure, UTCTime)
generateDataStructure url = do
req <- HC.parseRequest url
out <- HC.withManager $ \ manager -> do
res <- HC.http req manager
HC.responseBody res C.$$+- conduitFile "effective_tld_names.dat" C.=$ sink
current_time <- getCurrentTime
putStrLn $ "Fetched Public Suffix List at " ++ show current_time
return (out, current_time)
main :: IO ()
main = do
(ds, current_time) <- generateDataStructure "http://mxr.mozilla.org/mozilla-central/source/netwerk/dns/effective_tld_names.dat?raw=1"
withFile "Network/PublicSuffixList/DataStructure.hs" WriteMode $ \ h -> do
hPutStrLn h "{-# LANGUAGE OverloadedStrings #-}"
hPutStrLn h "{-# LANGUAGE CPP #-}"
hPutStrLn h ""
hPutStrLn h $ "-- DO NOT MODIFY! This file has been automatically generated from the Create.hs script at " ++ show current_time
hPutStrLn h ""
hPutStrLn h "module Network.PublicSuffixList.DataStructure (dataStructure) where"
hPutStrLn h ""
hPutStrLn h "import Data.ByteString.Char8 ()"
hPutStrLn h ""
hPutStrLn h "import Network.PublicSuffixList.Types"
hPutStrLn h "#if !defined(RUNTIMELIST)"
hPutStrLn h "import qualified Data.ByteString as BS"
hPutStrLn h "import Network.PublicSuffixList.Serialize"
hPutStrLn h "#else"
hPutStrLn h "import qualified Network.PublicSuffixList.Create as PSLC"
hPutStrLn h "import qualified Data.Conduit as C"
hPutStrLn h "import Data.Conduit.Binary (sourceFile)"
hPutStrLn h "import System.IO.Unsafe (unsafePerformIO)"
hPutStrLn h "#endif"
hPutStrLn h ""
hPutStrLn h "-- We could just put the raw data structure here, but if we do that, there will be lots of"
hPutStrLn h "-- static string literals, which makes GHC really slow when compiling. Instead, we can manually"
hPutStrLn h "-- serialize the datastructure ourself, so there's only one string literal."
hPutStrLn h ""
hPutStrLn h "{-|"
hPutStrLn h $ "The opaque data structure that 'isSuffix' can query. This data structure was generated at " ++ show current_time
hPutStrLn h "-}"
hPutStrLn h "dataStructure :: DataStructure"
hPutStrLn h "#if defined(RUNTIMELIST)"
hPutStrLn h "{-# NOINLINE dataStructure #-}"
hPutStrLn h "dataStructure = unsafePerformIO $ C.runResourceT $ sourceFile RUNTIMELIST C.$$ PSLC.sink"
hPutStrLn h "#else"
hPutStrLn h "dataStructure = getDataStructure serializedDataStructure"
hPutStrLn h ""
hPutStrLn h "serializedDataStructure :: BS.ByteString"
hPutStrLn h $ "serializedDataStructure = " ++ (show $ putDataStructure ds)
hPutStrLn h ""
hPutStrLn h "#endif"