fixed some bugs found by functionalExamples

staffanm · Jul 4, 2014 · 1b2ee1c · 1b2ee1c
1 parent f3a86e8
commit 1b2ee1c
Show file tree

Hide file tree

Showing 7 changed files with 91 additions and 98 deletions.
diff --git a/doc/examples/firststeps-api.py b/doc/examples/firststeps-api.py
@@ -3,8 +3,9 @@
 
 # firststeps-api.py
 import sys
+import os
 import shutil
-sys.path.append("doc/examples") # to find w3cstandards.py
+sys.path.append(os.environ['FERENDA_HOME']+"/doc/examples") # to find w3cstandards.py
 
 # begin download-status
 from w3cstandards import W3CStandards

diff --git a/ferenda/facet.py b/ferenda/facet.py
@@ -22,7 +22,7 @@ def year(cls, row, binding='dcterms_issued', resource_graph=None):
         datestring = row[binding]
         # assume a date(time) like '2014-06-05T12:00:00', '2014-06-05'
         # or even '2014-06'
-        formatstring = {19: "%Y-%m-%dT%h:%m:%s",
+        formatstring = {19: "%Y-%m-%dT%H:%M:%S",
                         10: "%Y-%m-%d",
                         7: "%Y-%m"}[len(datestring)]
         d = datetime.strptime(datestring, formatstring)

diff --git a/ferenda/fulltextindex.py b/ferenda/fulltextindex.py
@@ -5,6 +5,7 @@
 import math
 import re
 import shutil
+import itertools
 from datetime import date, datetime, MAXYEAR, MINYEAR
 
 import six
@@ -371,7 +372,18 @@ class WhooshIndex(FulltextIndex):
     def __init__(self, location, repos):
         self._writer = None
         super(WhooshIndex, self).__init__(location, repos)
-        # self._schema = self.get_default_schema()
+        self._multiple = {}
+        # Initialize self._multiple so that we know which fields may
+        # contain multiple values. FIXME: v. similar to the code in
+        # make_schema
+        for repo in repos:
+            g = repo.make_graph() # for qname lookup
+            for facet in repo.facets():
+                if facet.dimension_label:
+                    fld = facet.dimension_label
+                else:
+                    fld = g.qname(facet.rdftype).replace(":", "_")
+                self._multiple[fld] = facet.multiple_values
 
     def exists(self):
         return whoosh.index.exists_in(self.location)
@@ -402,15 +414,20 @@ def update(self, uri, repo, basefile, text, **kwargs):
         if not self._writer:
             self._writer = self.index.writer()
 
-        # special-handling of the Resource type -- this is provided as
-        # a dict with 'iri' and 'label' keys, and we flatten it to a
-        # 2-element list (stored in an IDLIST)
         s = self.schema()
         for key in kwargs:
+            # special-handling of the Resource type -- this is provided as
+            # a dict with 'iri' and 'label' keys, and we flatten it to a
+            # 2-element list (stored in an IDLIST)
             if isinstance(s[key], Resource):
-            # if isinstance(kwargs[key], dict):
-                kwargs[key] = [kwargs[key]['iri'],
-                               kwargs[key]['label']]
+                # might be multiple values, in which case we create a
+                # n-element list, still stored as IDLIST
+                if isinstance(kwargs[key], list):
+                # or if self._multiple[key]:
+                    kwargs[key] = list(itertools.chain.from_iterable([(x['iri'], x['label'])for x in kwargs[key]]))
+                else:
+                    kwargs[key] = [kwargs[key]['iri'],
+                                   kwargs[key]['label']]
             elif isinstance(s[key], Datetime):
                 if (isinstance(kwargs[key], date) and
                     not isinstance(kwargs[key], datetime)):
@@ -523,8 +540,15 @@ def _convert_result(self, res):
             # de-marschal Resource objects from list to dict
             for key in resourcefields:
                 if key in fields:
-                    fields[key] = {'iri': fields[key][0],
-                                   'label': fields[key][1]}
+                    # need to return a list of dicts if
+                    # multiple_values was specified, and a simple dict
+                    # otherwise... (note that just examining if
+                    # len(fields[key]) == 2 isn't enough)
+                    if self._multiple[key]:
+                        fields[key] = [{'iri': x[0], 'label': x[1]} for x in zip(fields[key][0::2], fields[key][1::2])]
+                    else:
+                        fields[key] = {'iri': fields[key][0],
+                                       'label': fields[key][1]}
             l.append(fields)
         return l
 

diff --git a/ferenda/sources/tech/rfc.py b/ferenda/sources/tech/rfc.py
@@ -13,7 +13,7 @@
 from pyparsing import Word, CaselessLiteral, Optional, nums
 
 from ferenda import DocumentRepository
-from ferenda import TextReader, Describer, FSMParser, CitationParser, URIFormatter
+from ferenda import TextReader, Describer, FSMParser, CitationParser, URIFormatter, Facet
 from ferenda import util
 from ferenda.decorators import action, recordlastdownload, managedparsing, downloadmax
 from ferenda.elements import Body, Heading, Preformatted, Paragraph, UnorderedList, ListItem, Section, Subsection, Subsubsection, UnicodeElement, CompoundElement, Link, serialize
@@ -608,74 +608,16 @@ def parse_header(self, header, desc):
                 # personal author identity
                 desc.value(self.ns['dcterms'].rightsHolder, line)
 
-# FIXME: Rewrite this using Facets
-# 
-#    def toc_predicates(self):
-#        return [self.ns['rdf'].type,
-#                self.ns['dcterms'].identifier,
-#                self.ns['dcterms'].title,
-#                self.ns['dcterms'].publisher,
-#                self.ns['dcterms'].issued,
-#                self.ns['dcterms'].subject]
-#
-#    def toc_criteria(self, predicates=None):
-#        from ferenda import TocCriteria
-#        DCTERMS = self.ns['dcterms']
-#        RDF = self.ns['rdf']
-#        return [TocCriteria(binding='type',
-#                            label='Sorted by document type',
-#                            pagetitle='Documents of type %(select)s',
-#                            selector=lambda x: x['type'],
-#                            key=lambda x: x['type'],
-#                            predicate=RDF.type),
-#                
-#                TocCriteria(binding='identifier',
-#                            label='Sorted by RFC #',
-#                            pagetitle='RFCs %(select)s--99',
-#                            selector=lambda x: x['identifier'][4:-2] + "00",  # "RFC 6998" => "69"
-#                            key=lambda x: int(x['identifier'][4:]),
-#                            selector_descending=True,
-#                            key_descending=True,
-#                            predicate=DCTERMS.identifier),   # "RFC 6998" => 6998
-#
-#                TocCriteria(binding='title',
-#                            label='Sorted by title',
-#                            pagetitle='Documents starting with "%(select)s"',
-#                            # "The 'view-state'" property => "v"
-#                            selector=lambda x: util.title_sortkey(x['title'])[0],
-#                            key=lambda x: util.title_sortkey(x['title']),
-#                            predicate=DCTERMS.title),
-#
-#                TocCriteria(binding='publisher',
-#                            label='Sorted by stream',
-#                            pagetitle='Documents in the %(select)s stream',
-#                            selector=lambda x: x['publisher'],  # Must convert this URI to label (here or in the query)
-#                            key=lambda x: x['publisher'],
-#                            selector_descending=True,
-#                            key_descending=True,
-#                            predicate=DCTERMS.publisher),
-#
-#                TocCriteria(binding='issued',
-#                            label='Sorted by year',
-#                            pagetitle='Documents published in %(select)s',
-#                            selector=lambda x: x['issued'][:4],  # '2013-08-01' => '2013'
-#                            key=lambda x: x['issued'],
-#                            selector_descending=True,
-#                            key_descending=True,
-#                            predicate=DCTERMS.issued),
-#
-#                TocCriteria(binding='subject',
-#                            label='Sorted by category',
-#                            pagetitle='Documents in the %(select)s category',
-#                            selector=lambda x: x['subject'],
-#                            key=lambda x: int(x['identifier'][4:]),
-#                            key_descending=True,
-#                            predicate=DCTERMS.subject)
-#            ]
-
+    def facets(self):
+        return [Facet(self.ns['rdf'].type),
+                Facet(self.ns['dcterms'].identifier),
+                Facet(self.ns['dcterms'].title),
+                Facet(self.ns['dcterms'].publisher),
+                Facet(self.ns['dcterms'].issued),
+                Facet(self.ns['dcterms'].subject)]
     def toc_item(self, binding, row):
-        return [row['identifier'] + ": ",
-                Link(row['title'],
+        return [row['dcterms_identifier'] + ": ",
+                Link(row['dcterms_title'],
                      uri=row['uri'])]
 
     def news_criteria(self):

diff --git a/test/functionalDocExamples.py b/test/functionalDocExamples.py
@@ -32,9 +32,14 @@ class Examples(unittest.TestCase, FerendaTestCase):
 
     # FIXME: copied from testExamples.py -- unittest makes it a lot of
     # work to inherit from other testcases
-    def _test_pyfile(self, pyfile, want=True, comparator=None):
+    def _test_pyfile(self, pyfile, workingdir=None, want=True, comparator=None):
+        if not workingdir:
+            workingdir = os.getcwd()
+        oldwd = os.getcwd()
         pycode = compile(util.readfile(pyfile), pyfile, 'exec')
+        os.chdir(workingdir)
         result = six.exec_(pycode, globals(), locals())
+        os.chdir(oldwd)
         # the exec:ed code is expected to set return_value
         got = locals()['return_value']
         if not comparator:
@@ -186,7 +191,10 @@ def _test_shfile(self, shfile, workingdir=None, extraenv={}, check_output=True):
     def test_firststeps_api(self):
         from ferenda.manager import setup_logger; setup_logger('CRITICAL')
         # FIXME: consider mocking print() here
-        self._test_pyfile("doc/examples/firststeps-api.py")
+        workingdir = tempfile.mkdtemp()
+        os.environ['FERENDA_HOME'] = os.getcwd()
+        self._test_pyfile("doc/examples/firststeps-api.py", workingdir)
+        shutil.rmtree(workingdir)
 
     def test_firststeps(self):
         # this test might fail whenever new W3C standards are added,
@@ -210,7 +218,9 @@ def test_firststeps(self):
     # runserver and disable them)
     def test_intro_example_py(self):
         os.environ['FERENDA_DOWNLOADMAX'] = '3'
-        self._test_pyfile("doc/examples/intro-example.py")
+        workingdir = tempfile.mkdtemp()
+        self._test_pyfile("doc/examples/intro-example.py", workingdir)
+        shutil.rmtree(workingdir)
 
     def test_intro_example_sh(self):
         workingdir = tempfile.mkdtemp()
@@ -223,13 +233,13 @@ def test_intro_example_sh(self):
         shutil.rmtree(workingdir)
 
     def test_rfc(self):
+        workingdir = tempfile.mkdtemp()
         try:
-            shutil.copy("doc/examples/rfc-annotations.rq", "rfc-annotations.rq")
-            shutil.copy("doc/examples/rfc.xsl", "rfc.xsl")
-            self._test_pyfile("doc/examples/rfcs.py")
+            shutil.copy("doc/examples/rfc-annotations.rq", workingdir+"/rfc-annotations.rq")
+            shutil.copy("doc/examples/rfc.xsl", workingdir+"/rfc.xsl")
+            self._test_pyfile("doc/examples/rfcs.py", workingdir)
         finally:
-            os.unlink("rfc-annotations.rq")
-            os.unlink("rfc.xsl")            
+            shutil.rmtree(workingdir)
 
     def test_composite(self):
         workingdir = tempfile.mkdtemp()

diff --git a/test/integrationFulltextIndex.py b/test/integrationFulltextIndex.py
@@ -63,8 +63,8 @@
      'dcterms_title':'Title of first document in first repo',
      'dcterms_identifier':'R1 D1',
      'dcterms_issued':datetime(2013,2,14,14,6), # important to use real datetime object, not string representation
-     'dcterms_publisher': {'iri': 'http://example.org/vocab/publ1',
-                   'label': 'Publishing & sons'},
+     'dcterms_publisher': [{'iri': 'http://example.org/vocab/publ1',
+                   'label': 'Publishing & sons'}],
      'dc_subject': ['green', 'standards'],
      'text': 'Long text here'},
     {'repo':'repo1',
@@ -73,8 +73,10 @@
      'dcterms_title':'Title of second document in first repo',
      'dcterms_identifier':'R1 D2',
      'dcterms_issued':datetime(2013,3,4,14,16),
-     'dcterms_publisher': {'iri': 'http://example.org/vocab/publ2',
-                   'label': 'Bookprinters and associates'},
+     'dcterms_publisher': [{'iri': 'http://example.org/vocab/publ2',
+                            'label': 'Bookprinters and associates'},
+                           {'iri': 'http://example.org/vocab/publ3',
+                            'label': 'Printers intl.'}],
      'dc_subject': ['suggestions'],
      'text': 'Even longer text here'},
     {'repo':'repo2',
@@ -92,7 +94,7 @@
      'dcterms_title':'Title of second document in second repo',
      'dcterms_identifier':'R2 D2',
      'ex_secret': True,
-     'dcterms_references': None,
+     'dcterms_references':'http://example.org/repo2/2',
      'dc_subject':['yellow', 'red'],
      'text': 'Even this one'}
     ]
@@ -117,7 +119,7 @@ def facets(self):
         EX = self.ns['ex']
         return [Facet(RDF.type),           
                 Facet(DCTERMS.title),      
-                Facet(DCTERMS.publisher),
+                Facet(DCTERMS.publisher, multiple_values=True),
                 Facet(DCTERMS.identifier),
                 Facet(DCTERMS.issued),
                 Facet(EX.secret, indexingtype=Boolean()),
@@ -245,18 +247,19 @@ def test_setup(self):
 
     def test_insert(self):
         self.index.update(**custom_dataset[0]) # repo1
-        self.index.update(**custom_dataset[2]) # repo2
+        self.index.update(**custom_dataset[3]) # repo2
         self.index.commit()
         self.assertEqual(self.index.doccount(),2)
 
         res, pager = self.index.query(uri="http://example.org/repo1/1")
         self.assertEqual(len(res), 1)
         self.assertEqual(custom_dataset[0],res[0])
 
-        res, pager = self.index.query(uri="http://example.org/repo2/1")
+        res, pager = self.index.query(uri="http://example.org/repo2/2")
         self.assertEqual(len(res), 1)
-        self.assertEqual(custom_dataset[2],res[0])
-
+        self.assertEqual(custom_dataset[3],res[0])
+
+
 
 class CustomQuery(object):        
 

diff --git a/test/testDocRepo.py b/test/testDocRepo.py
@@ -31,7 +31,7 @@
 import doctest
 
 from ferenda import DocumentEntry, TocPageset, TocPage, \
-    Describer, LayeredConfig, TripleStore, FulltextIndex
+    Describer, LayeredConfig, TripleStore, FulltextIndex, Facet
 from ferenda.fulltextindex import WhooshIndex
 from ferenda.errors import *
 
@@ -1752,6 +1752,19 @@ def test_facets(self):
         facets = self.repo.facets()
         self.assertEqual(facets[0].rdftype, rdflib.RDF.type)
         # and more ...
+
+
+    def test_year(self):
+        self.assertEqual('2014',
+                         Facet.year({'dcterms_issued': '2014-06-05T12:00:00'}))
+        self.assertEqual('2014',
+                         Facet.year({'dcterms_issued': '2014-06-05'}))
+        self.assertEqual('2014',
+                         Facet.year({'dcterms_issued': '2014-06'}))
+        with self.assertRaises(Exception):
+            Facet.year({'dcterms_issued': 'This is clearly an invalid date'})
+        with self.assertRaises(Exception):
+            Facet.year({'dcterms_issued': '2014-14-99'})
 
 
 class News(RepoTester):