Permalink
Browse files

preparing for a 0.1 release

  • Loading branch information...
1 parent 29bed68 commit e2d9bbba5fb86d025b1174f9059a9ab7f0ab6cb5 @peterbe committed Jan 14, 2013
Showing with 108 additions and 26 deletions.
  1. +2 −0 .gitignore
  2. +9 −0 docs/api.rst
  3. +12 −0 docs/changelog.rst
  4. +1 −0 docs/index.rst
  5. +1 −0 mincss/__init__.py
  6. +20 −24 mincss/processor.py
  7. +61 −0 setup.py
  8. +2 −2 tests/test_mincss.py
View
@@ -1,3 +1,5 @@
output/
proxy/.cache/
docs/_build/
+mincss.egg-info/
+/build/
View
@@ -18,6 +18,15 @@ API
Once all URLs have been processed the CSS is analyzed.
+ * ``process_url(url)``
+ Given a specific URL it will download it and parse the HTML. This
+ method will download the HTML then called ``process_html()``.
+
+ * ``process_html(html, url)``
+ If you for some reason already have the HTML you can jump straight
+ to this method. Note, you still need to provide the URL where you
+ got the HTML from so it can use that to download any external CSS.
+
The ``Processor`` instance will make two attributes available
* ``instance.inlines``
View
@@ -0,0 +1,12 @@
+.. index:: changelog
+
+.. _changelog-chapter:
+
+Changelog
+=========
+
+
+v0.1 (2013-01-14)
+-----------------
+
+Initial release.
View
@@ -26,6 +26,7 @@ is `available on Github <https://github.com/peterbe/mincss>`_.
gettingstarted
features
api
+ changelog
Indices and tables
View
@@ -0,0 +1 @@
+__version__ = '0.1'
View
@@ -7,7 +7,7 @@
from lxml.cssselect import CSSSelector, SelectorSyntaxError, ExpressionError
import urllib
-#RE_HAS_MEDIA = re.compile("@media")
+
RE_FIND_MEDIA = re.compile("(@media.+?)(\{)", re.DOTALL | re.MULTILINE)
RE_NESTS = re.compile('@(-|keyframes).*?({)', re.DOTALL | re.M)
@@ -18,7 +18,12 @@
class ParserError(Exception):
- """happens we fail to parse the HTML"""
+ """happens when we fail to parse the HTML"""
+ pass
+
+
+class DownloadError(Exception):
+ """happens when we fail to down the URL"""
pass
@@ -45,18 +50,18 @@ def _download(self, url):
try:
response = urllib.urlopen(url)
if response.getcode() is not None:
- assert (
- response.getcode() == 200,
- '%s -- %s ' % (url, response.getcode())
- )
+ if response.getcode() != 200:
+ raise DownloadError(
+ '%s -- %s ' % (url, response.getcode())
+ )
html = response.read()
return unicode(html, 'utf-8')
except IOError:
raise IOError(url)
def process(self, *urls):
for url in urls:
- self._process_url(url)
+ self.process_url(url)
for identifier in sorted(self.blocks.keys()):
content = self.blocks[identifier]
@@ -78,29 +83,21 @@ def process(self, *urls):
self.links.append(
LinkResult(
href,
- url,
+ #url,
content,
processed
)
)
- def _process_url(self, url):
+ def process_url(self, url):
html = self._download(url)
+ self.process_html(html.strip(), url=url)
+
+ def process_html(self, html, url):
parser = etree.HTMLParser()
- stripped = html.strip()
- tree = etree.fromstring(stripped, parser).getroottree()
+ tree = etree.fromstring(html, parser).getroottree()
page = tree.getroot()
- #print repr(stripped[:100])
- # lxml inserts a doctype if none exists, so only include it in
- # the root if it was in the original html.
- #print repr(tree.docinfo.doctype)
- #if stripped.startswith(tree.docinfo.doctype):
- # root = tree
- #else:
- # root = page
- #root = tree if stripped.startswith(tree.docinfo.doctype) else page
-
if page is None:
print repr(html)
raise ParserError("Could not parse the html")
@@ -135,7 +132,6 @@ def _process_content(self, content, bodies):
def commentmatcher(match):
whole = match.group()
# are we in a block or outside
- #p = content.find(match.group())
nearest_close = content[:match.start()].rfind('}')
nearest_open = content[:match.start()].rfind('{')
next_close = content[match.end():].find('}')
@@ -355,7 +351,7 @@ def __init__(self, line, url, *args):
class LinkResult(_Result):
- def __init__(self, href, url, *args):
+ def __init__(self, href, *args):
self.href = href
- self.url = url
+ #self.url = url
super(LinkResult, self).__init__(*args)
View
@@ -0,0 +1,61 @@
+import codecs
+import os
+import re
+
+
+# Prevent spurious errors during `python setup.py test`, a la
+# http://www.eby-sarna.com/pipermail/peak/2010-May/003357.html:
+try:
+ import multiprocessing
+except ImportError:
+ pass
+
+from setuptools import setup, find_packages
+
+
+def read(*parts):
+ return codecs.open(os.path.join(os.path.dirname(__file__), *parts)).read()
+
+
+def find_version(*file_paths):
+ version_file = read(*file_paths)
+ version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]",
+ version_file, re.M)
+ if version_match:
+ return version_match.group(1)
+ raise RuntimeError("Unable to find version string.")
+
+def find_install_requires():
+ return [x.strip() for x in
+ read('requirements.txt').splitlines()
+ if x.strip() and not x.startswith('#')]
+
+
+setup(
+ name='mincss',
+ version=find_version('mincss/__init__.py'),
+ description='clears the junk out of your CSS',
+ long_description=read('README.md') + '\n\n' +
+ '\n'.join(read('docs', 'changelog.rst')
+ .splitlines()[1:]),
+ author='Peter Bengtsson',
+ author_email='mail@peterbe.com',
+ license='BSD',
+ packages=find_packages(),
+ include_package_data=True,
+ zip_safe=False,
+ classifiers=[
+ 'Development Status :: 4 - Beta',
+ 'Intended Audience :: Developers',
+ 'License :: OSI Approved :: BSD License',
+ 'Operating System :: OS Independent',
+ 'Programming Language :: Python',
+ 'Programming Language :: Python :: 2',
+ 'Programming Language :: Python :: 2.6',
+ 'Programming Language :: Python :: 2.7',
+ ],
+ install_requires=find_install_requires(),
+ tests_require=['nose'],
+ test_suite='tests',
+ url='http://github.com/peterbe/mincss'
+)
View
@@ -38,7 +38,7 @@ def test_just_one_link(self):
# two.html only has 1 link CSS ref
link = p.links[0]
eq_(link.href, 'two.css')
- eq_(link.url, url.replace('.html', '.css'))
+ #eq_(link.url, url.replace('.html', '.css'))
ok_(len(link.after) < len(link.before))
lines_after = link.after.splitlines()
# compare line by line
@@ -61,7 +61,7 @@ def test_one_link_two_different_pages(self):
# two.html only has 1 link CSS ref
link = p.links[0]
eq_(link.href, 'two.css')
- eq_(link.url, url1.replace('.html', '.css'))
+ #eq_(link.url, url1.replace('.html', '.css'))
ok_(len(link.after) < len(link.before))
lines_after = link.after.splitlines()
# compare line by line

0 comments on commit e2d9bbb

Please sign in to comment.