Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Initial commit. I intend to neaten it up, add comments and add .cabal…

… file
  • Loading branch information...
commit 1ad654586b72e9b19fbdcada6a52d0b97ab66446 0 parents
Rob Stewart authored
Showing with 254 additions and 0 deletions.
  1. +41 −0 README
  2. +213 −0 list_mappings.hs
41 README
@@ -0,0 +1,41 @@
+This hack is really just a programming excercise :-)
+
+What data: The List event 2012
+
+What I've done with it
+---
+- I've written a Haskell program that...
+- Transforms the XML into RDF
+- Defines a schema for The List specific fields
+- Reuses ontologies for field e.g. location (wg84pos)
+- Serializes the RDF into the Turtle file format
+
+In addition...
+- The program goes out to to the The List web API to grab more info
+about locations
+- Goes out to Linked Geo Data and discovers the nearby pubs to the
+venue (within 100 metres)
+
+
+Why?! have I done it?
+---
+- Linked Open Data is *really* awesome.
+- It'd great to have The List on the cloud of linked open data
+- ... on this map: http://richard.cyganiak.de/2007/10/lod/
+- (More exposure on the semantic web for The List events!!)
+- Venue location info is disambiguated, so all kinds of interesting
+information can be sought from the semantic web (finding pubs is just
+a proof-of-concept).
+
+Files
+---
+- list_mappings.hs <- the Haskell code to generate the turtle file
+- list.ttl <- the file generated
+- README <- this file
+
+
+What next?
+--
+- You can install a SPARQL endpoit.. point it at the .ttl file, and
+you have a *semantic search endpoint* for free out-of-the-box.
+- Get in touch with me.. I'd happily add more (robstewart57@gmail.com).
213 list_mappings.hs
@@ -0,0 +1,213 @@
+{-# LANGUAGE ScopedTypeVariables #-}
+
+import qualified Data.Map as Map
+import qualified Data.ByteString.Lazy.Char8 as B
+import Text.XML.HaXml.Parse
+import Text.XML.HaXml.Posn
+import Text.XML.HaXml.Combinators
+import Text.XML.HaXml
+import Data.RDF
+import Data.RDF.TriplesGraph
+import Text.RDF.RDF4H.TurtleSerializer
+import Network.HTTP.Base
+import System.IO
+import Data.Maybe
+import Network.HTTP
+import Text.RDF.RDF4H.TurtleParser
+import Network.URI hiding (URI)
+import Data.List.Split
+
+
+main :: IO ()
+main = do
+ let fname = "../../data/live/chs2012-list-events.xml"
+ f <- readFile fname
+ let (Document _ _ root _) = xmlParse fname f
+ let rootElem = CElem root noPos
+ events = (tag "ives:IvesMessage" /> elm) rootElem
+ triples <- mapM eventTriples events
+ let prefixes = (PrefixMappings fringePrefixMappings)
+ (rdfGraph::TriplesGraph) = mkRdf (concat triples) Nothing prefixes
+ outh <- openFile "list.ttl" WriteMode
+ hWriteRdf (TurtleSerializer Nothing prefixes) outh rdfGraph
+ hClose outh
+
+eventTriples :: Content a -> IO Triples
+eventTriples event = do
+ let id = eventID event
+ subj = unode (s2b $ fringeResource ++ id)
+ ks = Map.keys mappings
+ topLevelTriples = concatMap (\k -> genTriple subj event k mappings ) ks
+ venTriple = venuTriple event
+ tagTriples = eventTagTriples event
+ putStrLn $ "Processing event: " ++ id
+ locationTriples <- processEventLocation event
+ return ([venTriple] ++ tagTriples ++ locationTriples ++ topLevelTriples)
+
+genTriple :: forall a. Subject -> Content a -> Node -> Map.Map Node (FilterLookup a) -> [Triple]
+genTriple subj event k mapping =
+ let obj = objectFromXml event mapping k
+ in case obj of
+ Nothing -> []
+ Just obj -> [triple subj k obj]
+
+
+data FilterLookup a = URI (CFilter a) | Literal (CFilter a) | TypedLiteral String (CFilter a)
+
+objectFromXml :: Content a -> Map.Map Node (FilterLookup a) -> Node -> Maybe Node
+objectFromXml event map k =
+ let (Just filterLookup) = Map.lookup k map
+ in case filterLookup of
+ (URI filt) ->
+ let s = extractTxt filt event
+ in case s of
+ "" -> Nothing
+ _ -> Just (unode $ s2b (fringeResource ++ urlEncode s))
+ (Literal filt) ->
+ let s = extractTxt filt event
+ in case s of
+ "" -> Nothing
+ _ -> Just (lnode (plainL $ s2b s))
+
+extractTxt :: CFilter a -> Content a -> String
+extractTxt xmlFilter event =
+ let c = xmlFilter event
+ in case length c of
+ 1 -> let (CString _ s _) = head c
+ in s
+ _ -> ""
+
+
+
+findNearbyPubs venue vid = do
+ let latFilter = tag "venue" /> tag "latitude" /> txt
+ longFilter = tag "venue" /> tag "longitude" /> txt
+ lat = extractTxt latFilter venue
+ long = extractTxt longFilter venue
+ query = "http://linkedgeodata.org/page/near/" ++ lat ++ "," ++ long ++ "/1000/class/Pub.n3"
+ rdf <- httpCallForRdf query
+ case rdf of
+ Left e -> error (show e)
+ Right g -> do
+ let subj = (unode . s2b) vid
+ pred = unode . s2b $ fringeProp ++ "neabyPub"
+ linkTriples = map (triple subj pred . subjectOf) (triplesOf g)
+ return (linkTriples ++ triplesOf g)
+
+
+-- |Takes a generated uri and makes simple HTTP request,
+-- asking for RDF N3 serialization. Returns either 'ParseFailure' or 'RDF'
+httpCallForRdf :: String -> IO (Either ParseFailure TriplesGraph)
+httpCallForRdf uri = do
+ let h1 = mkHeader HdrUserAgent "hsparql-client"
+ h2 = mkHeader HdrAccept "text/rdf+n3"
+ request = Request { rqURI = fromJust $ parseURI uri
+ , rqHeaders = [h1,h2]
+ , rqMethod = GET
+ , rqBody = ""
+ }
+ response <- simpleHTTP request >>= getResponseBody
+ return $ parseString (TurtleParser Nothing Nothing) (B.pack response)
+
+
+fringeResource = "http://www.edfringe.com/resource#"
+fringeProp = "http://www.edfringe.com/prop#"
+lodeProp = "http://linkedevents.org/ontology/"
+geonamesProp = "http://www.geonames.org/ontology#"
+wg84posProp = "http://www.w3.org/2003/01/geo/wgs84_pos#"
+linkedGeoData = "http://linkedgeodata.org/ontology/"
+
+thelist = "http://www.list.co.uk/onto/resource#"
+thelistProp = "http://www.list.co.uk/onto/prop#"
+
+fringePrefixMappings :: Map.Map B.ByteString B.ByteString
+fringePrefixMappings = Map.fromList $
+ [ (s2b "fringe", s2b fringeResource)
+ , (s2b "fringeprop", s2b fringeProp)
+ , (s2b "lode", s2b lodeProp)
+ , (s2b "gn", s2b geonamesProp)
+ , (s2b "wgs84_pos", s2b wg84posProp)
+ , (s2b "lgdo", s2b linkedGeoData)
+ ]
+
+mappings :: forall a. Map.Map Node (FilterLookup a)
+mappings = Map.fromList
+ [ (unode (s2b (lodeProp++"atTime")), Literal (tag "Event" /> tag "Schedule" /> tag "Performance" /> tag "StartDateTime" /> txt)) -- TODO typed literal as date
+ , (unode (s2b (thelist++"title")), Literal (tag "Event" /> tag "Title" /> tag "MainTitle" /> txt))
+ , (unode (s2b (thelist++"description")), Literal (tag "Event" /> tag "Description" /> tag "Plain" /> txt))
+ ]
+
+
+venuTriple :: forall t. Content t -> Triple
+venuTriple event =
+ let id = venuID event
+ s = unode (s2b (thelist ++ (eventID event)))
+ p = unode (s2b (lodeProp ++ "atPlace"))
+ o = unode (s2b id)
+ in triple s p o
+
+venuID :: forall t. Content t -> String
+venuID event =
+ let filter = tag "Event" /> tag "Venue"
+ (CElem (Elem _ attrs _) _) = head $ filter event
+ (_, vid) = attrs !! 1
+ in show vid
+
+eventID :: forall t. Content t -> String
+eventID event =
+ let filt = tag "Event"
+ (CElem (Elem _ attrs _) _) = head $ filt event
+ (_, eid) = attrs !! 0
+ in show eid
+
+eventTagTriples :: forall t. Content t -> [Triple]
+eventTagTriples event =
+ let filt = tag "Event" /> tag "Property" /> txt
+ propsS = extractTxt filt event
+ props = parseProperties propsS
+ in map (eventTagTriple event) props
+
+eventTagTriple :: forall t. Content t -> String -> Triple
+eventTagTriple event prop =
+ let s = (unode (s2b (thelist ++ eventID event)))
+ p = (unode (s2b (thelist ++ "tag")))
+ o = lnode (plainL $ s2b prop)
+ in triple s p o
+
+parseProperties :: String -> [String]
+parseProperties s = splitOn "," s
+
+processEventLocation :: forall t. Content t -> IO [Triple]
+processEventLocation event = do
+ let vid = venuID event
+ let uri = vid ++ "?format=xml"
+ request = replaceHeader HdrUserAgent "user-agent" (getRequest uri)
+ response <- simpleHTTP request >>= getResponseBody
+ let (Document _ _ root _) = xmlParse "" response
+ let rootElem = CElem root noPos
+ venue = tag "venue" rootElem
+ ks = Map.keys locationMappings
+ subj = unode (s2b vid)
+ triples = concatMap (\k -> genTriple subj (head venue) k locationMappings ) ks
+ return triples
+
+-- Uncomment these lines when LinkedGeoData API is working :-/
+-- pubTriples <- findNearbyPubs (head venue) vid
+-- return (pubTriples ++ triples)
+
+
+locationMappings :: forall a. Map.Map Node (FilterLookup a)
+locationMappings = Map.fromList
+ [ (unode (s2b (thelist++"venueName")), Literal (tag "venue" /> tag "venue_name" /> txt))
+ , (unode (s2b (thelist++"altName")), Literal (tag "venue" /> tag "alt_names" /> txt))
+ , (unode (s2b (thelist++"previousName")), Literal (tag "venue" /> tag "previous_names" /> txt))
+ , (unode (s2b (thelist++"address")), Literal (tag "venue" /> tag "address" /> txt))
+ , (unode (s2b (thelist++"town")), Literal (tag "venue" /> tag "town" /> txt))
+ , (unode (s2b (thelist++"region")), Literal (tag "venue" /> tag "region" /> txt))
+ , (unode (s2b (thelist++"countryCode")), Literal (tag "venue" /> tag "country_code" /> txt))
+ , (unode (s2b (thelist++"postalCode")), Literal (tag "venue" /> tag "postal_code" /> txt))
+ , (unode (s2b (wg84posProp++"lat")), Literal (tag "venue" /> tag "latitude" /> txt))
+ , (unode (s2b (wg84posProp++"long")), Literal (tag "venue" /> tag "longitude" /> txt))
+ , (unode (s2b (thelist++"radius")), Literal (tag "venue" /> tag "radius" /> txt))
+ , (unode (s2b (thelist++"vidType")), Literal (tag "venue" /> tag "vid_type" /> txt))
+ ]
Please sign in to comment.
Something went wrong with that request. Please try again.