Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Initial commit. I intend to neaten it up, add comments and add .cabal…
… file
- Loading branch information
Rob Stewart
committed
Apr 28, 2012
0 parents
commit 1ad6545
Showing
2 changed files
with
254 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
This hack is really just a programming excercise :-) | ||
|
||
What data: The List event 2012 | ||
|
||
What I've done with it | ||
--- | ||
- I've written a Haskell program that... | ||
- Transforms the XML into RDF | ||
- Defines a schema for The List specific fields | ||
- Reuses ontologies for field e.g. location (wg84pos) | ||
- Serializes the RDF into the Turtle file format | ||
|
||
In addition... | ||
- The program goes out to to the The List web API to grab more info | ||
about locations | ||
- Goes out to Linked Geo Data and discovers the nearby pubs to the | ||
venue (within 100 metres) | ||
|
||
|
||
Why?! have I done it? | ||
--- | ||
- Linked Open Data is *really* awesome. | ||
- It'd great to have The List on the cloud of linked open data | ||
- ... on this map: http://richard.cyganiak.de/2007/10/lod/ | ||
- (More exposure on the semantic web for The List events!!) | ||
- Venue location info is disambiguated, so all kinds of interesting | ||
information can be sought from the semantic web (finding pubs is just | ||
a proof-of-concept). | ||
|
||
Files | ||
--- | ||
- list_mappings.hs <- the Haskell code to generate the turtle file | ||
- list.ttl <- the file generated | ||
- README <- this file | ||
|
||
|
||
What next? | ||
-- | ||
- You can install a SPARQL endpoit.. point it at the .ttl file, and | ||
you have a *semantic search endpoint* for free out-of-the-box. | ||
- Get in touch with me.. I'd happily add more (robstewart57@gmail.com). |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,213 @@ | ||
{-# LANGUAGE ScopedTypeVariables #-} | ||
|
||
import qualified Data.Map as Map | ||
import qualified Data.ByteString.Lazy.Char8 as B | ||
import Text.XML.HaXml.Parse | ||
import Text.XML.HaXml.Posn | ||
import Text.XML.HaXml.Combinators | ||
import Text.XML.HaXml | ||
import Data.RDF | ||
import Data.RDF.TriplesGraph | ||
import Text.RDF.RDF4H.TurtleSerializer | ||
import Network.HTTP.Base | ||
import System.IO | ||
import Data.Maybe | ||
import Network.HTTP | ||
import Text.RDF.RDF4H.TurtleParser | ||
import Network.URI hiding (URI) | ||
import Data.List.Split | ||
|
||
|
||
main :: IO () | ||
main = do | ||
let fname = "../../data/live/chs2012-list-events.xml" | ||
f <- readFile fname | ||
let (Document _ _ root _) = xmlParse fname f | ||
let rootElem = CElem root noPos | ||
events = (tag "ives:IvesMessage" /> elm) rootElem | ||
triples <- mapM eventTriples events | ||
let prefixes = (PrefixMappings fringePrefixMappings) | ||
(rdfGraph::TriplesGraph) = mkRdf (concat triples) Nothing prefixes | ||
outh <- openFile "list.ttl" WriteMode | ||
hWriteRdf (TurtleSerializer Nothing prefixes) outh rdfGraph | ||
hClose outh | ||
|
||
eventTriples :: Content a -> IO Triples | ||
eventTriples event = do | ||
let id = eventID event | ||
subj = unode (s2b $ fringeResource ++ id) | ||
ks = Map.keys mappings | ||
topLevelTriples = concatMap (\k -> genTriple subj event k mappings ) ks | ||
venTriple = venuTriple event | ||
tagTriples = eventTagTriples event | ||
putStrLn $ "Processing event: " ++ id | ||
locationTriples <- processEventLocation event | ||
return ([venTriple] ++ tagTriples ++ locationTriples ++ topLevelTriples) | ||
|
||
genTriple :: forall a. Subject -> Content a -> Node -> Map.Map Node (FilterLookup a) -> [Triple] | ||
genTriple subj event k mapping = | ||
let obj = objectFromXml event mapping k | ||
in case obj of | ||
Nothing -> [] | ||
Just obj -> [triple subj k obj] | ||
|
||
|
||
data FilterLookup a = URI (CFilter a) | Literal (CFilter a) | TypedLiteral String (CFilter a) | ||
|
||
objectFromXml :: Content a -> Map.Map Node (FilterLookup a) -> Node -> Maybe Node | ||
objectFromXml event map k = | ||
let (Just filterLookup) = Map.lookup k map | ||
in case filterLookup of | ||
(URI filt) -> | ||
let s = extractTxt filt event | ||
in case s of | ||
"" -> Nothing | ||
_ -> Just (unode $ s2b (fringeResource ++ urlEncode s)) | ||
(Literal filt) -> | ||
let s = extractTxt filt event | ||
in case s of | ||
"" -> Nothing | ||
_ -> Just (lnode (plainL $ s2b s)) | ||
|
||
extractTxt :: CFilter a -> Content a -> String | ||
extractTxt xmlFilter event = | ||
let c = xmlFilter event | ||
in case length c of | ||
1 -> let (CString _ s _) = head c | ||
in s | ||
_ -> "" | ||
|
||
|
||
|
||
findNearbyPubs venue vid = do | ||
let latFilter = tag "venue" /> tag "latitude" /> txt | ||
longFilter = tag "venue" /> tag "longitude" /> txt | ||
lat = extractTxt latFilter venue | ||
long = extractTxt longFilter venue | ||
query = "http://linkedgeodata.org/page/near/" ++ lat ++ "," ++ long ++ "/1000/class/Pub.n3" | ||
rdf <- httpCallForRdf query | ||
case rdf of | ||
Left e -> error (show e) | ||
Right g -> do | ||
let subj = (unode . s2b) vid | ||
pred = unode . s2b $ fringeProp ++ "neabyPub" | ||
linkTriples = map (triple subj pred . subjectOf) (triplesOf g) | ||
return (linkTriples ++ triplesOf g) | ||
|
||
|
||
-- |Takes a generated uri and makes simple HTTP request, | ||
-- asking for RDF N3 serialization. Returns either 'ParseFailure' or 'RDF' | ||
httpCallForRdf :: String -> IO (Either ParseFailure TriplesGraph) | ||
httpCallForRdf uri = do | ||
let h1 = mkHeader HdrUserAgent "hsparql-client" | ||
h2 = mkHeader HdrAccept "text/rdf+n3" | ||
request = Request { rqURI = fromJust $ parseURI uri | ||
, rqHeaders = [h1,h2] | ||
, rqMethod = GET | ||
, rqBody = "" | ||
} | ||
response <- simpleHTTP request >>= getResponseBody | ||
return $ parseString (TurtleParser Nothing Nothing) (B.pack response) | ||
|
||
|
||
fringeResource = "http://www.edfringe.com/resource#" | ||
fringeProp = "http://www.edfringe.com/prop#" | ||
lodeProp = "http://linkedevents.org/ontology/" | ||
geonamesProp = "http://www.geonames.org/ontology#" | ||
wg84posProp = "http://www.w3.org/2003/01/geo/wgs84_pos#" | ||
linkedGeoData = "http://linkedgeodata.org/ontology/" | ||
|
||
thelist = "http://www.list.co.uk/onto/resource#" | ||
thelistProp = "http://www.list.co.uk/onto/prop#" | ||
|
||
fringePrefixMappings :: Map.Map B.ByteString B.ByteString | ||
fringePrefixMappings = Map.fromList $ | ||
[ (s2b "fringe", s2b fringeResource) | ||
, (s2b "fringeprop", s2b fringeProp) | ||
, (s2b "lode", s2b lodeProp) | ||
, (s2b "gn", s2b geonamesProp) | ||
, (s2b "wgs84_pos", s2b wg84posProp) | ||
, (s2b "lgdo", s2b linkedGeoData) | ||
] | ||
|
||
mappings :: forall a. Map.Map Node (FilterLookup a) | ||
mappings = Map.fromList | ||
[ (unode (s2b (lodeProp++"atTime")), Literal (tag "Event" /> tag "Schedule" /> tag "Performance" /> tag "StartDateTime" /> txt)) -- TODO typed literal as date | ||
, (unode (s2b (thelist++"title")), Literal (tag "Event" /> tag "Title" /> tag "MainTitle" /> txt)) | ||
, (unode (s2b (thelist++"description")), Literal (tag "Event" /> tag "Description" /> tag "Plain" /> txt)) | ||
] | ||
|
||
|
||
venuTriple :: forall t. Content t -> Triple | ||
venuTriple event = | ||
let id = venuID event | ||
s = unode (s2b (thelist ++ (eventID event))) | ||
p = unode (s2b (lodeProp ++ "atPlace")) | ||
o = unode (s2b id) | ||
in triple s p o | ||
|
||
venuID :: forall t. Content t -> String | ||
venuID event = | ||
let filter = tag "Event" /> tag "Venue" | ||
(CElem (Elem _ attrs _) _) = head $ filter event | ||
(_, vid) = attrs !! 1 | ||
in show vid | ||
|
||
eventID :: forall t. Content t -> String | ||
eventID event = | ||
let filt = tag "Event" | ||
(CElem (Elem _ attrs _) _) = head $ filt event | ||
(_, eid) = attrs !! 0 | ||
in show eid | ||
|
||
eventTagTriples :: forall t. Content t -> [Triple] | ||
eventTagTriples event = | ||
let filt = tag "Event" /> tag "Property" /> txt | ||
propsS = extractTxt filt event | ||
props = parseProperties propsS | ||
in map (eventTagTriple event) props | ||
|
||
eventTagTriple :: forall t. Content t -> String -> Triple | ||
eventTagTriple event prop = | ||
let s = (unode (s2b (thelist ++ eventID event))) | ||
p = (unode (s2b (thelist ++ "tag"))) | ||
o = lnode (plainL $ s2b prop) | ||
in triple s p o | ||
|
||
parseProperties :: String -> [String] | ||
parseProperties s = splitOn "," s | ||
|
||
processEventLocation :: forall t. Content t -> IO [Triple] | ||
processEventLocation event = do | ||
let vid = venuID event | ||
let uri = vid ++ "?format=xml" | ||
request = replaceHeader HdrUserAgent "user-agent" (getRequest uri) | ||
response <- simpleHTTP request >>= getResponseBody | ||
let (Document _ _ root _) = xmlParse "" response | ||
let rootElem = CElem root noPos | ||
venue = tag "venue" rootElem | ||
ks = Map.keys locationMappings | ||
subj = unode (s2b vid) | ||
triples = concatMap (\k -> genTriple subj (head venue) k locationMappings ) ks | ||
return triples | ||
|
||
-- Uncomment these lines when LinkedGeoData API is working :-/ | ||
-- pubTriples <- findNearbyPubs (head venue) vid | ||
-- return (pubTriples ++ triples) | ||
|
||
|
||
locationMappings :: forall a. Map.Map Node (FilterLookup a) | ||
locationMappings = Map.fromList | ||
[ (unode (s2b (thelist++"venueName")), Literal (tag "venue" /> tag "venue_name" /> txt)) | ||
, (unode (s2b (thelist++"altName")), Literal (tag "venue" /> tag "alt_names" /> txt)) | ||
, (unode (s2b (thelist++"previousName")), Literal (tag "venue" /> tag "previous_names" /> txt)) | ||
, (unode (s2b (thelist++"address")), Literal (tag "venue" /> tag "address" /> txt)) | ||
, (unode (s2b (thelist++"town")), Literal (tag "venue" /> tag "town" /> txt)) | ||
, (unode (s2b (thelist++"region")), Literal (tag "venue" /> tag "region" /> txt)) | ||
, (unode (s2b (thelist++"countryCode")), Literal (tag "venue" /> tag "country_code" /> txt)) | ||
, (unode (s2b (thelist++"postalCode")), Literal (tag "venue" /> tag "postal_code" /> txt)) | ||
, (unode (s2b (wg84posProp++"lat")), Literal (tag "venue" /> tag "latitude" /> txt)) | ||
, (unode (s2b (wg84posProp++"long")), Literal (tag "venue" /> tag "longitude" /> txt)) | ||
, (unode (s2b (thelist++"radius")), Literal (tag "venue" /> tag "radius" /> txt)) | ||
, (unode (s2b (thelist++"vidType")), Literal (tag "venue" /> tag "vid_type" /> txt)) | ||
] |