Skip to content
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
Cannot retrieve contributors at this time
Copyright (C) - 2017-2021 Róman Joost <>
This file is part of gtfsschedule.
gtfsschedule is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
gtfsschedule is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with gtfsschedule. If not, see <>.
-- | This module provides functions to download, unpack and import the static
-- schedule from the provided CSV files.
-- GTFS traffic data comes with a static set of CSV files, apart from the realtime
-- data providing changes to the static schedule.
-- See also:
module CSV.Import (createNewDatabase, runImport) where
import qualified CSV.Import.Calendar as CSVCalendar
import qualified CSV.Import.Route as CSVRoute
import qualified CSV.Import.Stop as CSVStop
import qualified CSV.Import.StopTime as CSVStopTime
import qualified CSV.Import.Trip as CSVTrip
import qualified Codec.Archive.Zip as Zip
import Control.Monad.IO.Class (MonadIO, liftIO)
import Control.Monad.Trans.Reader (ReaderT)
import Control.Monad.Trans.Resource (MonadResource, runResourceT)
import qualified Data.ByteString.Lazy as B
import Data.Conduit (sealConduitT, ($$+-))
import Data.Conduit.Binary (sinkFile)
import Data.Csv (FromNamedRecord)
import Data.Csv.Streaming (decodeByName)
import Data.Foldable (mapM_)
import qualified Data.Text as T
import Database.Esqueleto (PersistValue (..))
import qualified Database.Persist.Sqlite as Sqlite
import qualified Filesystem.Path.CurrentOS as Path
import qualified GTFS.Database as DB
import Network.HTTP.Client (parseRequest)
import Network.HTTP.Client.Conduit (defaultManagerSettings)
import Network.HTTP.Conduit (http, newManager, responseBody)
import System.Directory (createDirectoryIfMissing, renameFile)
import System.IO (hPutStr, stderr)
import System.IO.Temp (withSystemTempDirectory)
import Prelude hiding (mapM_)
datasetZipFilename :: String
datasetZipFilename = ""
-- | Creates a new database by fetching a GTFS static dataset from the given URL
-- The sequence of actions is as follows:
-- * Download the static dataset as a zip file
-- * extract all CSV files
-- * create a new, temporary database in which we import necessary CSV data
-- * rename current database with the newly created one
-- For each step a progress is reported to stderr
createNewDatabase ::
-- | URL to the static dataset
String ->
-- | Path to the users database file (if it exists)
FilePath ->
IO ()
createNewDatabase url currentDBFile = withSystemTempDirectory "NewGTFSDB" $ \x -> do
let newDBFile = currentDBFile ++ ".new"
ensureUserDatabaseDir (Path.fromText $ T.pack currentDBFile)
_ <- printProgress 2 ""
downloadStaticDataset url x >>= printProgress 3 >>= unzipDataset >>= printProgress 5 >>= runImport newDBFile
_ <- printProgress 8 currentDBFile >>= renameFile newDBFile >> printProgress 10 ""
return ()
-- | shows a progress bar to indicate overall import progress
-- Kudos to:
printProgress :: Int -> a -> IO a
printProgress progress x = do
putProgress $ drawProgressBar 50 (fromIntegral progress / 10)
return x
putProgress :: String -> IO ()
putProgress s = hPutStr stderr $ "\r\ESC[K" ++ s
drawProgressBar :: Int -> Rational -> String
drawProgressBar width progress = "[" ++ replicate dots '.' ++ replicate spaces ' ' ++ "]"
dots = round (progress * fromIntegral width)
spaces = width - dots
-- | Prepares the user database directory if it doesn't exist
ensureUserDatabaseDir ::
Path.FilePath ->
IO ()
ensureUserDatabaseDir userdbfile =
case Path.toText $ userdbfile of
Right dbdir -> createDirectoryIfMissing True (T.unpack dbdir)
Left path -> ioError $ userError ("Path has invalid encoding: " ++ T.unpack path)
-- | Downloads new data set to systems temp directory
-- TODO: file is assumed to be a zip file
downloadStaticDataset ::
String ->
FilePath ->
IO FilePath
downloadStaticDataset url downloadDir = runResourceT $ do
manager <- liftIO $ newManager defaultManagerSettings
request <- liftIO $ parseRequest url
response <- http request manager
sealConduitT (responseBody response) $$+- sinkFile downloadfp
return downloadDir
downloadfp = concat [downloadDir, "/", datasetZipFilename]
unzipDataset ::
FilePath ->
IO FilePath
unzipDataset downloaddir = do
contents <- B.readFile zipfile
Zip.extractFilesFromArchive [Zip.OptDestination downloaddir] (Zip.toArchive contents)
return downloaddir
zipfile = concat [downloaddir, "/", datasetZipFilename]
-- | runs the import against the given database
-- CSV file names have to conform with Googles GTFS reference, e.g. (routes.txt, instead of routes.csv)
runImport ::
-- | path to new SQLite database file
FilePath ->
-- | directory in which CSV files are found
FilePath ->
IO ()
runImport newDBFile downloaddir = DB.runDBWithoutLogging (T.pack newDBFile) $ do
_ <- Sqlite.runMigrationSilent DB.migrateAll
DB.prepareDatabaseForUpdate DB.Started
importCSV (absolutePath downloaddir "routes.txt", CSVRoute.prepareSQL, CSVRoute.convertToValues)
importCSV (absolutePath downloaddir "stops.txt", CSVStop.prepareSQL, CSVStop.convertToValues)
importCSV (absolutePath downloaddir "trips.txt", CSVTrip.prepareSQL, CSVTrip.convertToValues)
importCSV (absolutePath downloaddir "calendar.txt", CSVCalendar.prepareSQL, CSVCalendar.convertToValues)
importCSV (absolutePath downloaddir "stop_times.txt", CSVStopTime.prepareSQL, CSVStopTime.convertToValues)
DB.prepareDatabaseForUpdate DB.Finished
absolutePath path file = concat [path, "/", file]
importCSV ::
(FromNamedRecord a, MonadIO m, MonadResource m) =>
(String, T.Text, a -> [PersistValue]) ->
ReaderT Sqlite.SqlBackend m ()
importCSV (filepath, sql, convertfunc) = do
contents <- liftIO $ B.readFile filepath
case decodeByName contents of
Left errmsg -> liftIO $ print errmsg
Right (_, records) -> do
stmt <- DB.prepareStmt sql
mapM_ (DB.rawInsert stmt . convertfunc) records
liftIO $ Sqlite.stmtFinalize stmt
return ()