wok/page.py

# System
import os
import sys
from collections import namedtuple
from datetime import datetime, date, time
import logging
import copy

# Libraries
import jinja2
import yaml
import re
from slugify import slugify

# Wok
from wok import util
from wok import renderers
from wok.jinja import GlobFileLoader, AmbiguousTemplate

class Page(object):
    """
    A single page on the website in all its form (raw, rendered, templated),
    as well as its associated metadata.
    """

    tmpl_env = None

    @classmethod
    def create_tmpl_env(cls, options):
        cls.tmpl_env = jinja2.Environment(
                loader=GlobFileLoader(
                        searchpath=options.get('template_dir', 'templates'),
                        ignores=options.get('ignore_files', [])),
                extensions=options.get('jinja2_extensions', []))

    def __init__(self, options, engine):
        self.options = options
        self.filename = None
        self.meta = {}
        self.engine = engine

    @classmethod
    def from_meta(cls, meta, options, engine, renderer=renderers.Plain):
        """
        Build a page object from a meta dictionary.

        Note that you still need to call `render` and `write` to do anything
        interesting.
        """
        page = cls(options, engine)
        page.meta = meta
        page.options = options
        page.renderer = renderer

        if 'pagination' in meta:
            logging.debug('from_meta: current page %d' %
                    meta['pagination']['cur_page'])

        # Make a template environment. Hopefully no one expects this to ever
        # change after it is instantiated.
        if cls.tmpl_env is None:
            cls.create_tmpl_env(page.options)

        page.build_meta()
        return page

    @classmethod
    def from_file(cls, path, options, engine, renderer=renderers.Plain):
        """
        Load a file from disk, and parse the metadata from it.

        Note that you still need to call `render` and `write` to do anything
        interesting.
        """
        page = cls(options, engine)
        page.original = None
        page.options = options
        page.renderer = renderer

        logging.info('Loading {0}'.format(os.path.basename(path)))

        if cls.tmpl_env is None:
            cls.create_tmpl_env(page.options)

        page.path = path
        page.filename = os.path.basename(path)

        with open(path, 'rU') as f:
            page.original = f.read().decode('utf-8')
            splits = page.original.split('\n---\n')

            if len(splits) > 3:
                logging.warning('Found more --- delimited sections in {0} '
                                'than expected. Squashing the extra together.'
                                .format(page.path))

            # Handle the case where no metadata was provided.
            if len(splits) == 1:
                page.original = splits[0]
                page.meta = {}
                page.original_preview = ''

            elif len(splits) == 2:
                header = splits[0]
                page.meta = yaml.load(header)
                page.original = splits[1]
                page.original_preview = page.meta.get('preview', '')

            elif len(splits) >= 3:
                header = splits[0]
                page.meta = {}
                page.original = '\n'.join(splits[1:])
                page.original_preview = splits[1]
                page.meta.update(yaml.load(header))
                logging.debug('Got preview')

        page.build_meta()

        page.engine.run_hook('page.render.pre', page)
        page.meta['content'] = page.renderer.render(page.original, page.meta)  # the page.meta might contain renderer options...
        page.meta['preview'] = page.renderer.render(page.original_preview, page.meta)
        page.engine.run_hook('page.render.post', page)

        return page

    def build_meta(self):
        """
        Ensures the guarantees about metadata for documents are valid.

        `page.title` - Will be a string.
        `page.slug` - Will be a string.
        `page.author` - Will have fields `name` and `email`.
        `page.authors` - Will be a list of Authors.
        `page.category` - Will be a list.
        `page.published` - Will exist.
        `page.datetime` - Will be a datetime, or None.
        `page.date` - Will be a date, or None.
        `page.time` - Will be a time, or None.
        `page.tags` - Will be a list.
        `page.url` - Will be the url of the page, relative to the web root.
        `page.subpages` - Will be a list containing every sub page of this page
        """

        self.engine.run_hook('page.meta.pre', self)

        if not self.meta:
            self.meta = {}

        # source_path
        if not 'source_path' in self.meta:
            self.meta['source_path'] = None
            if self.filename:
                self.meta['source_path'] = self.filename

        # title
        if not 'title' in self.meta:
            if self.filename:
                # Take off the last file extension.
                self.meta['title'] = '.'.join(self.filename.split('.')[:-1])
                if (self.meta['title'] == ''):
                    self.meta['title'] = self.filename

                logging.warning("You didn't specify a title in {0}. Using the "
                                "file name as a title.".format(self.path))
            elif 'slug' in self.meta:
                self.meta['title'] = self.meta['slug']
                logging.warning("You didn't specify a title in {0}, which was "
                        "not generated from a file. Using the slug as a title."
                        .format(self.meta['slug']))
            else:
                logging.error("A page was generated that is not from a file, "
                        "has no title, and no slug. I don't know what to do. "
                        "Not using this page.")
                logging.info("Bad Meta's keys: {0}".format(self.meta.keys()))
                logging.debug("Bad Meta: {0}".format(self.meta))
                raise BadMetaException()

        # slug
        if not 'slug' in self.meta:
            if self.filename and self.options['slug_from_filename']:
                filename_no_ext = '.'.join(self.filename.split('.')[:-1])
                if filename_no_ext == '':
                    filename_no_ext = self.filename
                self.meta['slug'] = slugify(filename_no_ext)
                logging.info("You didn't specify a slug, generating it from the "
                             "filename.")
            else:
                self.meta['slug'] = slugify(unicode(self.meta['title']))
                logging.info("You didn't specify a slug, and no filename "
                             "exists. Generating the slug from the title.")

        elif self.meta['slug'] != slugify(self.meta['slug']):
            logging.warning('Your slug should probably be all lower case, and '
                            'match "[a-z0-9-]*"')

        # authors and author
        authors = self.meta.get('authors', self.meta.get('author', None))
        if isinstance(authors, list):
            self.meta['authors'] = [Author.parse(a) for a in authors]
        elif isinstance(authors, str):
            self.meta['authors'] = [Author.parse(a) for a in authors.split(',')]
            if len(self.meta['authors']) > 1:
                logging.warn('Deprecation Warning: Use YAML lists instead of '
                        'CSV for multiple authors. i.e. ["John Doe", "Jane '
                        'Smith"] instead of "John Doe, Jane Smith". In '
                        '{0}.'.format(self.path))

        elif authors is None:
            self.meta['authors'] = self.options.get('authors', [])
        else:
            # wait, what? Authors is of wrong type.
            self.meta['authors'] = []
            logging.error(('Authors in {0} is an unknown type. Valid types '
                           'are string or list. Instead it is a {1}')
                           .format(self.meta['slug']), authors.type)

        if self.meta['authors']:
            self.meta['author'] = self.meta['authors'][0]
        else:
            self.meta['author'] = Author()

        # category
        if 'category' in self.meta:
            if isinstance(self.meta['category'], str):
                self.meta['category'] = self.meta['category'].split('/')
            elif isinstance(self.meta['category'], list):
                pass
            else:
                # category is of wrong type.
                logging.error('Category in {0} is an unknown type. Valid '
                              'types are string or list. Instead it is a {1}'
                              .format(self.meta['slug'], type(self.meta['category'])))
                self.meta['category'] = []
        else:
            self.meta['category'] = []
        if self.meta['category'] == None:
            self.meta = []

        # published
        if not 'published' in self.meta:
            self.meta['published'] = True

        # make_file
        if not 'make_file' in self.meta:
            self.meta['make_file'] = True

        # datetime, date, time
        util.date_and_times(self.meta)

        # tags
        if 'tags' in self.meta:
            if isinstance(self.meta['tags'], list):
                # good
                pass
            elif isinstance(self.meta['tags'], str):
                self.meta['tags'] = [t.strip() for t in
                    self.meta['tags'].split(',')]
                if len(self.meta['tags']) > 1:
                    logging.warn('Deprecation Warning: Use YAML lists instead '
                            'of CSV for multiple tags. i.e. tags: [guide, '
                            'howto] instead of tags: guide, howto. In {0}.'
                            .format(self.path))
        else:
            self.meta['tags'] = []

        logging.debug('Tags for {0}: {1}'.
                format(self.meta['slug'], self.meta['tags']))

        # pagination
        if 'pagination' not in self.meta:
            self.meta['pagination'] = {}

        if 'cur_page' not in self.meta['pagination']:
            self.meta['pagination']['cur_page'] = 1
        if 'num_pages' not in self.meta['pagination']:
            self.meta['pagination']['num_pages'] = 1

        # template
        try:
            template_type = str(self.meta.get('type', 'default'))
            self.template = self.tmpl_env.get_template(template_type + '.*')
        except jinja2.loaders.TemplateNotFound:
            logging.error('No template "{0}.*" found in template directory. Aborting.'
                    .format(template_type))
            sys.exit()
        except AmbiguousTemplate:
            logging.error(('Ambiguous template found. There are two files that '
                          'match "{0}.*". Aborting.').format(template_type))
            sys.exit()

        # url
        parts = {
            'slug': self.meta['slug'],
            'category': '/'.join(self.meta['category']),
            'page': self.meta['pagination']['cur_page'],
            'date': self.meta['date'],
            'datetime': self.meta['datetime'],
            'time': self.meta['time'],
        }
        logging.debug('current page: ' + repr(parts['page']))

        # Pull extensions from the template's real file name.
        parts['ext'] = os.path.splitext(self.template.filename)[1]
        if parts['ext']:
            parts['ext'] = parts['ext'][1:] # remove leading dot
        # Deprecated
        parts['type'] = parts['ext']
        self.meta['ext'] = parts['ext']

        if parts['page'] == 1:
            parts['page'] = ''

        if 'url' in self.meta:
            logging.debug('Using page url pattern')
            self.url_pattern = self.meta['url']
        else:
            logging.debug('Using global url pattern')
            self.url_pattern = self.options['url_pattern']

        self.meta['url'] = self.url_pattern.format(**parts)

        logging.info('URL pattern is: {0}'.format(self.url_pattern))
        logging.info('URL parts are: {0}'.format(parts))

        # Get rid of extra slashes
        self.meta['url'] = re.sub(r'//+', '/', self.meta['url'])

        # If we have been asked to, rip out any plain "index.html"s
        if not self.options['url_include_index']:
            self.meta['url'] = re.sub(r'/index\.html$', '/', self.meta['url'])

        # To be used for writing page content
        self.meta['path'] = self.meta['url']
        # If site is going to be in a subdirectory
        if self.options.get('url_subdir'):
            self.meta['url'] = self.options['url_subdir'] + self.meta['url']

        # Some urls should start with /, some should not.
        if self.options['relative_urls'] and self.meta['url'][0] == '/':
            self.meta['url'] = self.meta['url'][1:]
        if not self.options['relative_urls'] and self.meta['url'][0] != '/':
            self.meta['url'] = '/' + self.meta['url']

        logging.debug('url is: ' + self.meta['url'])

        # subpages
        self.meta['subpages'] = []

        self.engine.run_hook('page.meta.post', self)

    def render(self, templ_vars=None):
        """
        Renders the page with the template engine.
        """
        logging.debug('Rendering ' + self.meta['slug'])
        if not templ_vars:
            templ_vars = {}

        # Handle pagination if we needed.
        if 'pagination' in self.meta and 'list' in self.meta['pagination']:
            extra_pages = self.paginate(templ_vars)
        else:
            extra_pages = []

        # Don't clobber possible values in the template variables.
        if 'page' in templ_vars:
            logging.debug('Found defaulted page data.')
            templ_vars['page'].update(self.meta)
        else:
            templ_vars['page'] = self.meta

        # Don't clobber pagination either.
        if 'pagination' in templ_vars:
            templ_vars['pagination'].update(self.meta['pagination'])
        else:
            templ_vars['pagination'] = self.meta['pagination']

        # ... and actions! (and logging, and hooking)
        self.engine.run_hook('page.template.pre', self, templ_vars)
        logging.debug('templ_vars.keys(): ' + repr(templ_vars.keys()))
        self.rendered = self.template.render(templ_vars)
        logging.debug('extra pages is: ' + repr(extra_pages))
        self.engine.run_hook('page.template.post', self)

        return extra_pages

    def paginate(self, templ_vars):
        extra_pages = []
        logging.debug('called pagination for {0}'.format(self.meta['slug']))
        if 'page_items' not in self.meta['pagination']:
            logging.debug('doing pagination for {0}'.format(self.meta['slug']))
            # This is the first page of a set of pages. Set up the rest. Other
            # wise don't do anything.

            source_spec = self.meta['pagination']['list'].split('.')
            logging.debug('pagination source is: ' + repr(source_spec))

            if source_spec[0] == 'page':
                source = self.meta
                source_spec.pop(0)
            elif source_spec[0] == 'site':
                source = templ_vars['site']
                source_spec.pop(0)
            else:
                logging.error('Unknown pagination source! Not paginating')
                return

            for k in source_spec:
                source = source[k]

            sort_key = self.meta['pagination'].get('sort_key', None)
            sort_reverse = self.meta['pagination'].get('sort_reverse', False)

            logging.debug('sort_key: {0}, sort_reverse: {1}'.format(
                sort_key, sort_reverse))

            if not source:
                return extra_pages
            if isinstance(source[0], Page):
                source = [p.meta for p in source]

            if sort_key is not None:
                if isinstance(source[0], dict):
                    source.sort(key=lambda x: x[sort_key],
                            reverse=sort_reverse)
                else:
                    source.sort(key=lambda x: x.__getattribute__(sort_key),
                            reverse=sort_reverse)

            chunks = list(util.chunk(source, self.meta['pagination']['limit']))
            if not chunks:
                return extra_pages

            # Make a page for each chunk
            for idx, chunk in enumerate(chunks[1:], 2):
                new_meta = copy.deepcopy(self.meta)
                new_meta.update({
                    'url': self.url_pattern,
                    'pagination': {
                        'page_items': chunk,
                        'num_pages': len(chunks),
                        'cur_page': idx,
                    }
                })
                new_page = self.from_meta(new_meta, self.options, self.engine,
                    renderer=self.renderer)
                logging.debug('page {0} is {1}'.format(idx, new_page))
                if new_page:
                    extra_pages.append(new_page)

            # Set up the next/previous page links
            for idx, page in enumerate(extra_pages):
                if idx == 0:
                    page.meta['pagination']['prev_page'] = self.meta
                else:
                    page.meta['pagination']['prev_page'] = extra_pages[idx-1].meta

                if idx < len(extra_pages) - 1:
                    page.meta['pagination']['next_page'] = extra_pages[idx+1].meta
                else:
                    page.meta['pagination']['next_page'] = None

            # Pagination date for this page
            self.meta['pagination'].update({
                'page_items': chunks[0],
                'num_pages': len(chunks),
                'cur_page': 1,
            })
            # Extra pages doesn't include the first page, so if there is at
            # least one, then make a link to the next page.
            if len(extra_pages) > 0:
                self.meta['pagination']['next_page'] = extra_pages[0].meta

        return extra_pages

    def write(self):
        """Write the page to a rendered file on disk."""

        # Use what we are passed, or the default given, or the current dir
        base_path = self.options.get('output_dir', '.')
        path = self.meta['path']
        if path and path[0] == '/':
            path = path[1:]
        base_path = os.path.join(base_path, path)
        if base_path.endswith('/'):
            base_path += 'index.' + self.meta['ext']

        try:
            os.makedirs(os.path.dirname(base_path))
        except OSError as e:
            logging.debug('makedirs failed for {0}'.format(
                os.path.basename(base_path)))
            # Probably that the dir already exists, so thats ok.
            # TODO: double check this. Permission errors are something to worry
            # about
        logging.info('writing to {0}'.format(base_path))

        logging.debug('Writing {0} to {1}'.format(self.meta['slug'], base_path))
        f = open(base_path, 'w')
        f.write(self.rendered.encode('utf-8'))
        f.close()

    def __repr__(self):
        return "&lt;wok.page.Page '{0}'&gt;".format(self.meta['slug'])


class Author(object):
    """Smartly manages a author with name and email"""
    parse_author_regex = re.compile(r'^([^<>]*) *(<(.*@.*)>)?$')

    def __init__(self, raw='', name=None, email=None):
        self.raw = raw.strip()
        self.name = name
        self.email = email

    @classmethod
    def parse(cls, raw):
        if isinstance(raw, cls):
            return raw

        a = cls(raw)
        a.name, _, a.email = cls.parse_author_regex.match(raw).groups()
        if a.name:
            a.name = a.name.strip()
        if a.email:
            a.email = a.email.strip()
        return a

    def __str__(self):
        if not self.name:
            return self.raw
        if not self.email:
            return self.name

        return "{0} <{1}>".format(self.name, self.email)

    def __repr__(self):
        return '<wok.page.Author "{0} <{1}>">'.format(self.name, self.email)

    def __unicode__(self):
        s = self.__str__()
        return s.replace('<', '&lt;').replace('>', '&gt;')

class BadMetaException(Exception):
    pass