Permalink
Switch branches/tags
Nothing to show
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
60 lines (49 sloc) 2.91 KB
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'Lorenzo Vigentini'
__copyright__ = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
__version__ = 'v1.01'
__date__ = '13, January 2010'
__description__ = 'PCMag (www.pcmag.com) delivers authoritative, labs-based comparative reviews of computing and Internet products to highly engaged technology buyers.'
'''
http://www.pcmag.com/
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Comment
class pcMag(BasicNewsRecipe):
__author__ = 'Lorenzo Vigentini'
description = 'PCMag (www.pcmag.com) delivers authoritative, labs-based comparative reviews of computing and Internet products to highly engaged technology buyers.'
cover_url = 'http://www.pcmag.com/images/bg-logo-sharp.2.gif'
title = 'PC Magazine'
publisher = 'Ziff Davis Media'
category = 'PC, computing, product reviews'
language = 'en'
encoding = 'cp1252'
timefmt = '[%a, %d %b, %Y]'
oldest_article = 15
max_articles_per_feed = 25
use_embedded_content = False
recursion = 10
remove_javascript = True
no_stylesheets = True
feeds = [
(u'Tech Commentary from the Editors of PC Magazine', u'http://rssnewsapps.ziffdavis.com/PCMAG_commentary.xml'),
(u'PC Magazine Breaking News', u'http://rssnewsapps.ziffdavis.com/pcmagtips.xml'),
(u'PC Magazine Tips and Solutions', u'http://rssnewsapps.ziffdavis.com/pcmagofficetips.xml'),
(u'PC Magazine Small Business', u'http://blogs.pcmag.com/atwork/index.xml'),
(u'PC Magazine Security Watch', u'http://feeds.ziffdavis.com/ziffdavis/securitywatch?format=xml'),
(u'PC Magazine: the Official John C. Dvorak RSS Feed', u'http://rssnewsapps.ziffdavis.com/PCMAG_dvorak.xml'),
(u'PC Magazine Editor-in-Chief Lance Ulanoff', u'http://rssnewsapps.ziffdavis.com/pcmagulanoff.xml'),
(u'Michael Millers Forward Thinking from PCMag.com', u'http://feeds.ziffdavis.com/ziffdavis/pcmag-miller?format=xml'),
(u'Technology News from Ziff Davis', u'http://rssnewsapps.ziffdavis.com/pcmagbreakingnews.xml')
]
keep_only_tags = [dict(attrs={'class':'content-page'})]
remove_tags = [
dict(attrs={'class':['control-side','comment','highlights_content','btn-holder','subscribe-panel',
'grey-box comments-box']}),
dict(id=['inlineDigg']),
dict(text=lambda text:isinstance(text, Comment)),
dict(name='img', width='1'),
]
preprocess_regexps = [(re.compile(r"<img '[^']+?'"), lambda m : '<img ')]