jpost.recipe

import re
from calibre.web.feeds.news import BasicNewsRecipe

class JerusalemPost(BasicNewsRecipe):

    title       = 'Jerusalem Post'
    description = 'News from Israel and the Middle East'
    use_embedded_content   = False
    language = 'en'

    __author__ = 'Kovid Goyal'
    max_articles_per_feed = 10
    no_stylesheets = True

    feeds =  [ ('Front Page', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1123495333346'),
               ('Israel News', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1178443463156'),
               ('Middle East News', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1123495333498'),
               ('International News', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1178443463144'),
               ('Editorials', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1123495333211'),
          ]

    remove_tags = [
            dict(id=lambda x: x and 'ads.' in x),
            dict(attrs={'class':['printinfo', 'tt1']}),
            dict(onclick='DoPrint()'),
            dict(name='input'),
            ]

    conversion_options = {'linearize_tables':True}

    def preprocess_html(self, soup):
        for tag in soup.findAll('form'):
            tag.name = 'div'
        return soup

    def print_version(self, url):
        m = re.search(r'(ID|id)=(\d+)', url)
        if m is not None:
            id_ = m.group(2)
            return 'http://www.jpost.com/LandedPages/PrintArticle.aspx?id=%s'%id_
        return url