Skip to content
This repository has been archived by the owner on Feb 1, 2019. It is now read-only.

Commit

Permalink
Merge pull request #158 from mattbasta/bug726994
Browse files Browse the repository at this point in the history
Removed DOCTYPE restrictions in install.rdf (bug726994)
  • Loading branch information
mattbasta committed Jun 28, 2012
2 parents c52742f + b6f2a36 commit 6d50635
Show file tree
Hide file tree
Showing 11 changed files with 143 additions and 68 deletions.
2 changes: 1 addition & 1 deletion requirements.txt
Expand Up @@ -2,7 +2,7 @@ coverage==3.3.1
cssutils==0.9.7
Fabric==0.9.0
nose==0.11.4
rdflib==3.0.0
rdflib==3.2.1
simplejson==2.3.0
argparse==1.1
-e git://github.com/mattbasta/fastchardet#egg=fastchardet
2 changes: 1 addition & 1 deletion tests/helper.py
Expand Up @@ -25,7 +25,7 @@ def _do_test(path, test, failure=True,
if require_install:
err.save_resource("has_install_rdf", True)
rdf_data = package.read("install.rdf")
install_rdf = RDFParser(rdf_data)
install_rdf = RDFParser(err, rdf_data)
err.save_resource("install_rdf", install_rdf)

populate_chrome_manifest(err, package)
Expand Down
1 change: 0 additions & 1 deletion tests/test_conduit.py
Expand Up @@ -3,7 +3,6 @@
import validator.testcases.conduit as conduit
from validator.errorbundler import ErrorBundle
from validator.xpi import XPIManager
from validator.rdf import RDFParser
from helper import _do_test
from validator.constants import *

Expand Down
2 changes: 1 addition & 1 deletion tests/test_installrdf.py
Expand Up @@ -96,7 +96,7 @@ def _run_test_raw(data, failure=True, detected_type=0, listed=True,
err.save_resource("listed", listed)
err.overrides = overrides

parser = RDFParser(data)
parser = RDFParser(err, data)
installrdf._test_rdf(err, parser)

print err.print_summary(verbose=True)
Expand Down
1 change: 0 additions & 1 deletion tests/test_libraryblacklist.py
Expand Up @@ -4,7 +4,6 @@
from validator.compat import FX9_DEFINITION
from validator.errorbundler import ErrorBundle
from validator.xpi import XPIManager
from validator.rdf import RDFParser


def test_blacklisted_files():
Expand Down
27 changes: 18 additions & 9 deletions tests/test_rdf.py
@@ -1,52 +1,60 @@
from StringIO import StringIO

from nose.tools import raises

import validator.rdf as rdf
from validator.rdf import RDFParser
from validator.rdf import RDFParser, RDFException


def testopen():
"""Tests that the RDF parser is capable of loading an RDF file
successfully."""

r = RDFParser(open("tests/resources/rdf/pass.rdf"))
r = RDFParser(None, open("tests/resources/rdf/pass.rdf"))
assert r.rdf


@raises(RDFException)
def test_load_bad():
"""Tests that the RDF parser throws an error for invalid, damaged,
or corrupt RDF files."""

r = RDFParser(open("tests/resources/rdf/fail.rdf"))
r = RDFParser(None, open("tests/resources/rdf/fail.rdf"))
assert not r.rdf


def test_load_rdf_stringio():
"""Tests that the RDF parser is capable of loading an RDF file
from a StringIO object successfully."""

sio = StringIO(open("tests/resources/rdf/pass.rdf").read())
r = RDFParser(sio)
r = RDFParser(None, StringIO(open("tests/resources/rdf/pass.rdf").read()))
assert r.rdf


def test_namespacing():
"""Tests that the RDF parser successfully creates namespaces."""

r = RDFParser(open("tests/resources/rdf/pass.rdf"), "foo")
r = RDFParser(None, open("tests/resources/rdf/pass.rdf"), "foo")

assert r.namespace == "foo"
assert str(r.uri("bar")) == "foo#bar"
assert str(r.uri("bar", "abc")) == "abc#bar"


def test_namespacing():
"""Tests that the RDF parser successfully creates namespaces."""

r = RDFParser(open("tests/resources/rdf/pass.rdf"), "foo")
r = RDFParser(None, open("tests/resources/rdf/pass.rdf"), "foo")

assert r.namespace == "foo"
assert str(r.uri("bar")) == "foo#bar"
assert str(r.uri("bar", "abc")) == "abc#bar"


def test_get_root_subject():
"Tests the integrity of the get_root_subject() function"

r = RDFParser(open("tests/resources/rdf/pass.rdf"))
r = RDFParser(None, open("tests/resources/rdf/pass.rdf"))
type_uri = r.uri("type")

emtype = r.get_object(None, type_uri)
Expand All @@ -55,11 +63,12 @@ def test_get_root_subject():
emtype = r.get_object(r.get_root_subject(), type_uri)
assert emtype is not None


def test_get_object():
""""Tests the integrity of the get_object() and get_objects()
functions."""

r = RDFParser(open("tests/resources/rdf/pass.rdf"))
r = RDFParser(None, open("tests/resources/rdf/pass.rdf"))
test_uri = r.uri("test")

emtest = r.get_object(None, test_uri)
Expand Down
2 changes: 1 addition & 1 deletion tests/test_submain_install_rdf.py
Expand Up @@ -73,7 +73,7 @@ def test_doctype():
err = ErrorBundle()
xpi = MockXPIManager(
{"install.rdf": "tests/resources/installrdf/doctype.rdf"})
assert isinstance(submain._load_install_rdf(err, xpi, None), ErrorBundle)
submain._load_install_rdf(err, xpi, None)
assert err.failed()
assert not err.get_resource("has_install_rdf")
assert not err.get_resource("install_rdf")
Expand Down
2 changes: 1 addition & 1 deletion tests/test_targetapplication.py
Expand Up @@ -13,7 +13,7 @@
def _do_test_raw(rdf, listed=True, overrides=None):
err = ErrorBundle(listed=listed)
err.overrides = overrides
rdf = RDFParser(rdf.strip())
rdf = RDFParser(err, rdf.strip())
err.save_resource("has_install_rdf", True)
err.save_resource("install_rdf", rdf)

Expand Down
2 changes: 1 addition & 1 deletion tests/test_typedetection.py
Expand Up @@ -17,7 +17,7 @@ def _test_type(file_, expectation, failure=False):

# Load up the install.rdf into an RDFParser
install_file = package.read("install.rdf")
install_rdf = RDFParser(install_file)
install_rdf = RDFParser(err, install_file)

results = typedetection.detect_type(err, install_rdf, package)

Expand Down
108 changes: 91 additions & 17 deletions validator/rdf.py
@@ -1,34 +1,108 @@
import rdflib
import types
from rdflib import URIRef
from rdflib import Graph, URIRef
from rdflib.exceptions import ParserError
from StringIO import StringIO
from xml.sax import SAXParseException


class RDFParser(object):
"""This little gem (not to be confused with a Ruby gem) loads and
parses an RDF file."""
class RDFException(Exception):
"""Exception thrown when the RDF parser encounters a problem."""

def __init__(self, message=None, orig_exception=None):
if message is None and orig_exception is not None:
message = orig_exception.message

super(RDFException, self).__init__(message)
self.orig_exception = orig_exception

def line(self):
return (self.orig_exception.getLineNumber() if self.orig_exception else
None)


class AddonRDFEntity(object):
"""
A "resolved" entity within an RDF file in an add-on. For use by SAX during
the entity resolution process.
"""

def getByteStream(self):
yield None

def getSystemId(self):
return ""


class AddonRDFEntityResolver(object):
"""
An entity resolver to be used by SAX for resolving internal entity
references.
"""

def __init__(self, data, namespace=None):
# Load up and parse the file in XML format.
graph = rdflib.Graph()
def __init__(self, err):
self.err = err

def resolveEntity(self, public, system):
if system.startswith("data:"):
self.err.warning(
err_id=("rdf", "entity_resolver", "data_uri"),
warning="`data:` URIs are not permitted in `install.rdf`.",
filename="install.rdf")
elif system.startswith("chrome://"):
self.err.warning(
err_id=("rdf", "entity_resolver", "chrome_uri"),
warning="`chrome://` URI referenced before initialization.",
description="A chrome URI was referenced before the "
"browser chrome was initialized.",
filename="install.rdf")
else:
self.err.warning(
err_id=("rdf", "entity_resolver", "remote_uri"),
warning="Remote URI referenced from `install.rdf`.",
description="Remote URIs should not be used within "
"`install.rdf` files.",
filename="install.rdf")

return AddonRDFEntity()


class RDFParser(object):
"""Parser wrapper for RDF files."""

def __init__(self, err, data, namespace=None):
self.err = err
self.manifest = u"urn:mozilla:install-manifest"
self.namespace = "http://www.mozilla.org/2004/em-rdf"
if namespace is not None:
self.namespace = namespace
self.namespace = namespace or "http://www.mozilla.org/2004/em-rdf"

# Try it!
if isinstance(data, types.StringTypes):
data = StringIO(data) # Wrap data in a pseudo-file

from rdflib.plugins.parsers import rdfxml
orig_create_parser = rdfxml.create_parser

try:
# Patch rdflib to not resolve URL entities.
def create_parser(*args, **kwargs):
parser = orig_create_parser(*args, **kwargs)
parser.setEntityResolver(AddonRDFEntityResolver(err))
return parser
rdfxml.create_parser = create_parser

# Load up and parse the file in XML format.
graph = Graph()
graph.parse(data, format="xml")
except Exception as error:
self.rdf = None
return
else:
self.rdf = graph

except ParserError as ex:
# Re-raise the exception in a local exception type.
raise RDFException(message=ex.message)
except SAXParseException as ex:
# Raise the SAX parse exceptions so we get some line info.
raise RDFException(orig_exception=ex)
finally:
# If we fail, we don't want to sully up the creation function.
rdfxml.create_parser = orig_create_parser

def uri(self, element, namespace=None):
"Returns a URIRef object for use with the RDF document."

Expand All @@ -40,7 +114,7 @@ def uri(self, element, namespace=None):
def get_root_subject(self):
"Returns the BNode which describes the topmost subject of the graph."

manifest = rdflib.term.URIRef(self.manifest)
manifest = URIRef(self.manifest)

if list(self.rdf.triples((manifest, None, None))):
return manifest
Expand Down
62 changes: 28 additions & 34 deletions validator/submain.py
@@ -1,6 +1,5 @@
import logging
import os
import re
import signal
from zipfile import BadZipfile
from zlib import error as zlib_error
Expand All @@ -9,7 +8,7 @@
from validator.opensearch import detect_opensearch
from validator.webapp import detect_webapp
from validator.chromemanifest import ChromeManifest
from validator.rdf import RDFParser
from validator.rdf import RDFException, RDFParser
from validator.xpi import XPIManager
from validator import decorator

Expand Down Expand Up @@ -189,43 +188,38 @@ def test_package(err, file_, name, expectation=PACKAGE_ANY,


def _load_install_rdf(err, package, expectation):
# Load up the install.rdf file.
install_rdf_data = package.read("install.rdf")

if re.search('<!doctype', install_rdf_data, re.I):
err.save_resource("bad_install_rdf", True)
return err.error(("main",
"test_package",
"doctype_in_installrdf"),
"DOCTYPEs are not permitted in install.rdf",
"The add-on's install.rdf file contains a DOCTYPE. "
"It must be removed before your add-on can be "
"validated.",
filename="install.rdf")

install_rdf = RDFParser(install_rdf_data)

if install_rdf.rdf is None or not install_rdf:
return err.error(("main",
"test_package",
"cannot_parse_installrdf"),
"Cannot Parse install.rdf",
"The install.rdf file could not be parsed.",
filename="install.rdf")
try:
install_rdf = RDFParser(err, package.read("install.rdf"))
except RDFException as ex:
err.error(
err_id=("main", "test_package", "parse_error"),
error="Could not parse `install.rdf`.",
description="The RDF parser was unable to parse the "
"install.rdf file included with this add-on.",
filename="install.rdf",
line=ex.line())
return
else:
err.save_resource("has_install_rdf", True, pushable=True)
err.save_resource("install_rdf", install_rdf, pushable=True)
if install_rdf.rdf is None:
err.error(
err_id=("main", "test_package", "cannot_parse_installrdf"),
error="Cannot read `install.rdf`",
description="The install.rdf file could not be parsed.",
filename="install.rdf")
return
else:
err.save_resource("has_install_rdf", True, pushable=True)
err.save_resource("install_rdf", install_rdf, pushable=True)

# Load up the results of the type detection
results = detect_type(err, install_rdf, package)

if results is None:
return err.error(("main",
"test_package",
"undeterminable_type"),
"Unable to determine add-on type",
"The type detection algorithm could not determine "
"the type of the add-on.")
err.error(
err_id=("main", "test_package", "undeterminable_type"),
error="Unable to determine add-on type",
description="The type detection algorithm could not determine "
"the type of the add-on.")
return
else:
err.set_type(results)

Expand Down

0 comments on commit 6d50635

Please sign in to comment.