Skip to content

Commit

Permalink
fixed some bugs found by functionalExamples
Browse files Browse the repository at this point in the history
  • Loading branch information
staffanm committed Jul 4, 2014
1 parent f3a86e8 commit 1b2ee1c
Show file tree
Hide file tree
Showing 7 changed files with 91 additions and 98 deletions.
3 changes: 2 additions & 1 deletion doc/examples/firststeps-api.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@

# firststeps-api.py
import sys
import os
import shutil
sys.path.append("doc/examples") # to find w3cstandards.py
sys.path.append(os.environ['FERENDA_HOME']+"/doc/examples") # to find w3cstandards.py

# begin download-status
from w3cstandards import W3CStandards
Expand Down
2 changes: 1 addition & 1 deletion ferenda/facet.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def year(cls, row, binding='dcterms_issued', resource_graph=None):
datestring = row[binding]
# assume a date(time) like '2014-06-05T12:00:00', '2014-06-05'
# or even '2014-06'
formatstring = {19: "%Y-%m-%dT%h:%m:%s",
formatstring = {19: "%Y-%m-%dT%H:%M:%S",
10: "%Y-%m-%d",
7: "%Y-%m"}[len(datestring)]
d = datetime.strptime(datestring, formatstring)
Expand Down
42 changes: 33 additions & 9 deletions ferenda/fulltextindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import math
import re
import shutil
import itertools
from datetime import date, datetime, MAXYEAR, MINYEAR

import six
Expand Down Expand Up @@ -371,7 +372,18 @@ class WhooshIndex(FulltextIndex):
def __init__(self, location, repos):
self._writer = None
super(WhooshIndex, self).__init__(location, repos)
# self._schema = self.get_default_schema()
self._multiple = {}
# Initialize self._multiple so that we know which fields may
# contain multiple values. FIXME: v. similar to the code in
# make_schema
for repo in repos:
g = repo.make_graph() # for qname lookup
for facet in repo.facets():
if facet.dimension_label:
fld = facet.dimension_label
else:
fld = g.qname(facet.rdftype).replace(":", "_")
self._multiple[fld] = facet.multiple_values

def exists(self):
return whoosh.index.exists_in(self.location)
Expand Down Expand Up @@ -402,15 +414,20 @@ def update(self, uri, repo, basefile, text, **kwargs):
if not self._writer:
self._writer = self.index.writer()

# special-handling of the Resource type -- this is provided as
# a dict with 'iri' and 'label' keys, and we flatten it to a
# 2-element list (stored in an IDLIST)
s = self.schema()
for key in kwargs:
# special-handling of the Resource type -- this is provided as
# a dict with 'iri' and 'label' keys, and we flatten it to a
# 2-element list (stored in an IDLIST)
if isinstance(s[key], Resource):
# if isinstance(kwargs[key], dict):
kwargs[key] = [kwargs[key]['iri'],
kwargs[key]['label']]
# might be multiple values, in which case we create a
# n-element list, still stored as IDLIST
if isinstance(kwargs[key], list):
# or if self._multiple[key]:
kwargs[key] = list(itertools.chain.from_iterable([(x['iri'], x['label'])for x in kwargs[key]]))
else:
kwargs[key] = [kwargs[key]['iri'],
kwargs[key]['label']]
elif isinstance(s[key], Datetime):
if (isinstance(kwargs[key], date) and
not isinstance(kwargs[key], datetime)):
Expand Down Expand Up @@ -523,8 +540,15 @@ def _convert_result(self, res):
# de-marschal Resource objects from list to dict
for key in resourcefields:
if key in fields:
fields[key] = {'iri': fields[key][0],
'label': fields[key][1]}
# need to return a list of dicts if
# multiple_values was specified, and a simple dict
# otherwise... (note that just examining if
# len(fields[key]) == 2 isn't enough)
if self._multiple[key]:
fields[key] = [{'iri': x[0], 'label': x[1]} for x in zip(fields[key][0::2], fields[key][1::2])]
else:
fields[key] = {'iri': fields[key][0],
'label': fields[key][1]}
l.append(fields)
return l

Expand Down
78 changes: 10 additions & 68 deletions ferenda/sources/tech/rfc.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from pyparsing import Word, CaselessLiteral, Optional, nums

from ferenda import DocumentRepository
from ferenda import TextReader, Describer, FSMParser, CitationParser, URIFormatter
from ferenda import TextReader, Describer, FSMParser, CitationParser, URIFormatter, Facet
from ferenda import util
from ferenda.decorators import action, recordlastdownload, managedparsing, downloadmax
from ferenda.elements import Body, Heading, Preformatted, Paragraph, UnorderedList, ListItem, Section, Subsection, Subsubsection, UnicodeElement, CompoundElement, Link, serialize
Expand Down Expand Up @@ -608,74 +608,16 @@ def parse_header(self, header, desc):
# personal author identity
desc.value(self.ns['dcterms'].rightsHolder, line)

# FIXME: Rewrite this using Facets
#
# def toc_predicates(self):
# return [self.ns['rdf'].type,
# self.ns['dcterms'].identifier,
# self.ns['dcterms'].title,
# self.ns['dcterms'].publisher,
# self.ns['dcterms'].issued,
# self.ns['dcterms'].subject]
#
# def toc_criteria(self, predicates=None):
# from ferenda import TocCriteria
# DCTERMS = self.ns['dcterms']
# RDF = self.ns['rdf']
# return [TocCriteria(binding='type',
# label='Sorted by document type',
# pagetitle='Documents of type %(select)s',
# selector=lambda x: x['type'],
# key=lambda x: x['type'],
# predicate=RDF.type),
#
# TocCriteria(binding='identifier',
# label='Sorted by RFC #',
# pagetitle='RFCs %(select)s--99',
# selector=lambda x: x['identifier'][4:-2] + "00", # "RFC 6998" => "69"
# key=lambda x: int(x['identifier'][4:]),
# selector_descending=True,
# key_descending=True,
# predicate=DCTERMS.identifier), # "RFC 6998" => 6998
#
# TocCriteria(binding='title',
# label='Sorted by title',
# pagetitle='Documents starting with "%(select)s"',
# # "The 'view-state'" property => "v"
# selector=lambda x: util.title_sortkey(x['title'])[0],
# key=lambda x: util.title_sortkey(x['title']),
# predicate=DCTERMS.title),
#
# TocCriteria(binding='publisher',
# label='Sorted by stream',
# pagetitle='Documents in the %(select)s stream',
# selector=lambda x: x['publisher'], # Must convert this URI to label (here or in the query)
# key=lambda x: x['publisher'],
# selector_descending=True,
# key_descending=True,
# predicate=DCTERMS.publisher),
#
# TocCriteria(binding='issued',
# label='Sorted by year',
# pagetitle='Documents published in %(select)s',
# selector=lambda x: x['issued'][:4], # '2013-08-01' => '2013'
# key=lambda x: x['issued'],
# selector_descending=True,
# key_descending=True,
# predicate=DCTERMS.issued),
#
# TocCriteria(binding='subject',
# label='Sorted by category',
# pagetitle='Documents in the %(select)s category',
# selector=lambda x: x['subject'],
# key=lambda x: int(x['identifier'][4:]),
# key_descending=True,
# predicate=DCTERMS.subject)
# ]

def facets(self):
return [Facet(self.ns['rdf'].type),
Facet(self.ns['dcterms'].identifier),
Facet(self.ns['dcterms'].title),
Facet(self.ns['dcterms'].publisher),
Facet(self.ns['dcterms'].issued),
Facet(self.ns['dcterms'].subject)]
def toc_item(self, binding, row):
return [row['identifier'] + ": ",
Link(row['title'],
return [row['dcterms_identifier'] + ": ",
Link(row['dcterms_title'],
uri=row['uri'])]

def news_criteria(self):
Expand Down
26 changes: 18 additions & 8 deletions test/functionalDocExamples.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,14 @@ class Examples(unittest.TestCase, FerendaTestCase):

# FIXME: copied from testExamples.py -- unittest makes it a lot of
# work to inherit from other testcases
def _test_pyfile(self, pyfile, want=True, comparator=None):
def _test_pyfile(self, pyfile, workingdir=None, want=True, comparator=None):
if not workingdir:
workingdir = os.getcwd()
oldwd = os.getcwd()
pycode = compile(util.readfile(pyfile), pyfile, 'exec')
os.chdir(workingdir)
result = six.exec_(pycode, globals(), locals())
os.chdir(oldwd)
# the exec:ed code is expected to set return_value
got = locals()['return_value']
if not comparator:
Expand Down Expand Up @@ -186,7 +191,10 @@ def _test_shfile(self, shfile, workingdir=None, extraenv={}, check_output=True):
def test_firststeps_api(self):
from ferenda.manager import setup_logger; setup_logger('CRITICAL')
# FIXME: consider mocking print() here
self._test_pyfile("doc/examples/firststeps-api.py")
workingdir = tempfile.mkdtemp()
os.environ['FERENDA_HOME'] = os.getcwd()
self._test_pyfile("doc/examples/firststeps-api.py", workingdir)
shutil.rmtree(workingdir)

def test_firststeps(self):
# this test might fail whenever new W3C standards are added,
Expand All @@ -210,7 +218,9 @@ def test_firststeps(self):
# runserver and disable them)
def test_intro_example_py(self):
os.environ['FERENDA_DOWNLOADMAX'] = '3'
self._test_pyfile("doc/examples/intro-example.py")
workingdir = tempfile.mkdtemp()
self._test_pyfile("doc/examples/intro-example.py", workingdir)
shutil.rmtree(workingdir)

def test_intro_example_sh(self):
workingdir = tempfile.mkdtemp()
Expand All @@ -223,13 +233,13 @@ def test_intro_example_sh(self):
shutil.rmtree(workingdir)

def test_rfc(self):
workingdir = tempfile.mkdtemp()
try:
shutil.copy("doc/examples/rfc-annotations.rq", "rfc-annotations.rq")
shutil.copy("doc/examples/rfc.xsl", "rfc.xsl")
self._test_pyfile("doc/examples/rfcs.py")
shutil.copy("doc/examples/rfc-annotations.rq", workingdir+"/rfc-annotations.rq")
shutil.copy("doc/examples/rfc.xsl", workingdir+"/rfc.xsl")
self._test_pyfile("doc/examples/rfcs.py", workingdir)
finally:
os.unlink("rfc-annotations.rq")
os.unlink("rfc.xsl")
shutil.rmtree(workingdir)

def test_composite(self):
workingdir = tempfile.mkdtemp()
Expand Down
23 changes: 13 additions & 10 deletions test/integrationFulltextIndex.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@
'dcterms_title':'Title of first document in first repo',
'dcterms_identifier':'R1 D1',
'dcterms_issued':datetime(2013,2,14,14,6), # important to use real datetime object, not string representation
'dcterms_publisher': {'iri': 'http://example.org/vocab/publ1',
'label': 'Publishing & sons'},
'dcterms_publisher': [{'iri': 'http://example.org/vocab/publ1',
'label': 'Publishing & sons'}],
'dc_subject': ['green', 'standards'],
'text': 'Long text here'},
{'repo':'repo1',
Expand All @@ -73,8 +73,10 @@
'dcterms_title':'Title of second document in first repo',
'dcterms_identifier':'R1 D2',
'dcterms_issued':datetime(2013,3,4,14,16),
'dcterms_publisher': {'iri': 'http://example.org/vocab/publ2',
'label': 'Bookprinters and associates'},
'dcterms_publisher': [{'iri': 'http://example.org/vocab/publ2',
'label': 'Bookprinters and associates'},
{'iri': 'http://example.org/vocab/publ3',
'label': 'Printers intl.'}],
'dc_subject': ['suggestions'],
'text': 'Even longer text here'},
{'repo':'repo2',
Expand All @@ -92,7 +94,7 @@
'dcterms_title':'Title of second document in second repo',
'dcterms_identifier':'R2 D2',
'ex_secret': True,
'dcterms_references': None,
'dcterms_references':'http://example.org/repo2/2',
'dc_subject':['yellow', 'red'],
'text': 'Even this one'}
]
Expand All @@ -117,7 +119,7 @@ def facets(self):
EX = self.ns['ex']
return [Facet(RDF.type),
Facet(DCTERMS.title),
Facet(DCTERMS.publisher),
Facet(DCTERMS.publisher, multiple_values=True),
Facet(DCTERMS.identifier),
Facet(DCTERMS.issued),
Facet(EX.secret, indexingtype=Boolean()),
Expand Down Expand Up @@ -245,18 +247,19 @@ def test_setup(self):

def test_insert(self):
self.index.update(**custom_dataset[0]) # repo1
self.index.update(**custom_dataset[2]) # repo2
self.index.update(**custom_dataset[3]) # repo2
self.index.commit()
self.assertEqual(self.index.doccount(),2)

res, pager = self.index.query(uri="http://example.org/repo1/1")
self.assertEqual(len(res), 1)
self.assertEqual(custom_dataset[0],res[0])

res, pager = self.index.query(uri="http://example.org/repo2/1")
res, pager = self.index.query(uri="http://example.org/repo2/2")
self.assertEqual(len(res), 1)
self.assertEqual(custom_dataset[2],res[0])

self.assertEqual(custom_dataset[3],res[0])



class CustomQuery(object):

Expand Down
15 changes: 14 additions & 1 deletion test/testDocRepo.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
import doctest

from ferenda import DocumentEntry, TocPageset, TocPage, \
Describer, LayeredConfig, TripleStore, FulltextIndex
Describer, LayeredConfig, TripleStore, FulltextIndex, Facet
from ferenda.fulltextindex import WhooshIndex
from ferenda.errors import *

Expand Down Expand Up @@ -1752,6 +1752,19 @@ def test_facets(self):
facets = self.repo.facets()
self.assertEqual(facets[0].rdftype, rdflib.RDF.type)
# and more ...


def test_year(self):
self.assertEqual('2014',
Facet.year({'dcterms_issued': '2014-06-05T12:00:00'}))
self.assertEqual('2014',
Facet.year({'dcterms_issued': '2014-06-05'}))
self.assertEqual('2014',
Facet.year({'dcterms_issued': '2014-06'}))
with self.assertRaises(Exception):
Facet.year({'dcterms_issued': 'This is clearly an invalid date'})
with self.assertRaises(Exception):
Facet.year({'dcterms_issued': '2014-14-99'})


class News(RepoTester):
Expand Down

0 comments on commit 1b2ee1c

Please sign in to comment.