-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Tom
committed
Dec 23, 2011
1 parent
3403377
commit fca9bfb
Showing
33 changed files
with
1,933 additions
and
649 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai | ||
|
||
from calibre.web.feeds.news import BasicNewsRecipe | ||
class Biolog_pl(BasicNewsRecipe): | ||
title = u'Biolog.pl' | ||
oldest_article = 7 | ||
max_articles_per_feed = 100 | ||
remove_empty_feeds=True | ||
__author__ = 'fenuks' | ||
description = u'Przyrodnicze aktualności ze świata nauki (codziennie aktualizowane), kurs biologii, testy i sprawdziany, forum dyskusyjne.' | ||
category = 'biology' | ||
language = 'pl' | ||
cover_url='http://www.biolog.pl/naukowy,portal,biolog.png' | ||
no_stylesheets = True | ||
#keeps_only_tags=[dict(id='main')] | ||
remove_tags_before=dict(id='main') | ||
remove_tags_after=dict(name='a', attrs={'name':'komentarze'}) | ||
remove_tags=[dict(name='img', attrs={'alt':'Komentarze'})] | ||
feeds = [(u'Wszystkie', u'http://www.biolog.pl/backend.php'), (u'Medycyna', u'http://www.biolog.pl/medycyna-rss.php'), (u'Ekologia', u'http://www.biolog.pl/rss-ekologia.php'), (u'Genetyka i biotechnologia', u'http://www.biolog.pl/rss-biotechnologia.php'), (u'Botanika', u'http://www.biolog.pl/rss-botanika.php'), (u'Le\u015bnictwo', u'http://www.biolog.pl/rss-lesnictwo.php'), (u'Zoologia', u'http://www.biolog.pl/rss-zoologia.php')] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
from calibre.web.feeds.news import BasicNewsRecipe | ||
class AdvancedUserRecipe1306097511(BasicNewsRecipe): | ||
title = u'Birmingham post' | ||
description = 'News for Birmingham UK' | ||
timefmt = '' | ||
__author__ = 'Dave Asbury' | ||
cover_url = 'http://1.bp.blogspot.com/_GwWyq5eGw9M/S9BHPHxW55I/AAAAAAAAB6Q/iGCWl0egGzg/s320/Birmingham+post+Lite+front.JPG' | ||
oldest_article = 1 | ||
max_articles_per_feed = 20 | ||
remove_empty_feeds = True | ||
remove_javascript = True | ||
auto_cleanup = True | ||
language = 'en_GB' | ||
|
||
|
||
masthead_url = 'http://www.pressgazette.co.uk/Pictures/web/t/c/g/birmingham_post.jpg' | ||
|
||
|
||
keep_only_tags = [ | ||
#dict(name='h1',attrs={'id' : 'article-headline'}), | ||
#dict(attrs={'class':['article-meta-author','article-meta-date','article main','art-o art-align-center otm-1 ']}), | ||
#dict(name='p') | ||
#dict(attrs={'id' : 'three-col'}) | ||
] | ||
remove_tags = [ | ||
# dict(name='div',attrs={'class' : 'span-33 last header-links'}) | ||
|
||
] | ||
feeds = [ | ||
#(u'News',u'http://www.birminghampost.net/news/rss.xml'), | ||
(u'Local News', u'http://www.birminghampost.net/news/west-midlands-news/rss.xml'), | ||
(u'UK News', u'http://www.birminghampost.net/news/uk-news/rss.xml'), | ||
(u'Sports',u'http://www.birminghampost.net/midlands-birmingham-sport/rss.xml'), | ||
(u'Bloggs & Comments',u'http://www.birminghampost.net/comment/rss.xml') | ||
|
||
] | ||
extra_css = ''' | ||
body {font: sans-serif medium;}' | ||
h1 {text-align : center; font-family:Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold;} | ||
h2 {text-align : center;color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; } | ||
span{ font-size:9.5px; font-weight:bold;font-style:italic} | ||
p { text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;} | ||
|
||
''' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai | ||
|
||
from calibre.web.feeds.news import BasicNewsRecipe | ||
class Computerworld_pl(BasicNewsRecipe): | ||
title = u'Computerworld.pl' | ||
__author__ = 'fenuks' | ||
description = u'Serwis o IT w przemyśle, finansach, handlu, administracji oraz rynku IT i telekomunikacyjnym - wiadomości, opinie, analizy, porady prawne' | ||
category = 'IT' | ||
language = 'pl' | ||
no_stylesheets=True | ||
oldest_article = 7 | ||
max_articles_per_feed = 100 | ||
keep_only_tags=[dict(name='div', attrs={'id':'s'})] | ||
remove_tags_after=dict(name='div', attrs={'class':'rMobi'}) | ||
remove_tags=[dict(name='div', attrs={'class':['nnav', 'rMobi']}), dict(name='table', attrs={'class':'ramka_slx'})] | ||
feeds = [(u'Wiadomo\u015bci', u'http://rssout.idg.pl/cw/news_iso.xml')] | ||
|
||
def get_cover_url(self): | ||
soup = self.index_to_soup('http://www.computerworld.pl/') | ||
cover=soup.find(name='img', attrs={'class':'prawo'}) | ||
self.cover_url=cover['src'] | ||
return getattr(self, 'cover_url', self.cover_url) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
__license__ = 'GPL v3' | ||
__author__ = 'faber1971' | ||
description = 'Italian soccer news website - v1.00 (17, December 2011)' | ||
|
||
from calibre.web.feeds.news import BasicNewsRecipe | ||
|
||
class AdvancedUserRecipe1324114272(BasicNewsRecipe): | ||
title = u'Datasport' | ||
language = 'it' | ||
__author__ = 'faber1971' | ||
oldest_article = 1 | ||
max_articles_per_feed = 100 | ||
auto_cleanup = True | ||
|
||
feeds = [(u'Datasport', u'http://www.datasport.it/calcio/rss.xml')] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai | ||
|
||
from calibre.web.feeds.news import BasicNewsRecipe | ||
import re | ||
class Dziennik_pl(BasicNewsRecipe): | ||
title = u'Dziennik.pl' | ||
__author__ = 'fenuks' | ||
description = u'Wiadomości z kraju i ze świata. Wiadomości gospodarcze. Znajdziesz u nas informacje, wydarzenia, komentarze, opinie.' | ||
category = 'newspaper' | ||
language = 'pl' | ||
cover_url='http://6.s.dziennik.pl/images/og_dziennik.jpg' | ||
no_stylesheets = True | ||
oldest_article = 7 | ||
max_articles_per_feed = 100 | ||
remove_javascript=True | ||
remove_empty_feeds=True | ||
preprocess_regexps = [(re.compile("Komentarze:"), lambda m: '')] | ||
keep_only_tags=[dict(id='article')] | ||
remove_tags=[dict(name='div', attrs={'class':['art_box_dodatki', 'new_facebook_icons2', 'leftArt', 'article_print', 'quiz-widget']}), dict(name='a', attrs={'class':'komentarz'})] | ||
feeds = [(u'Wszystko', u'http://rss.dziennik.pl/Dziennik-PL/'), | ||
(u'Wiadomości', u'http://rss.dziennik.pl/Dziennik-Wiadomosci'), | ||
(u'Gospodarka', u'http://rss.dziennik.pl/Dziennik-Gospodarka'), | ||
(u'Kobieta', u'http://rss.dziennik.pl/Dziennik-Kobieta'), | ||
(u'Auto', u'http://rss.dziennik.pl/Dziennik-Auto'), | ||
(u'Rozrywka', u'http://rss.dziennik.pl/Dziennik-Rozrywka'), | ||
(u'Film', u'http://rss.dziennik.pl/Dziennik-Film'), | ||
(u'Muzyka' , u'http://rss.dziennik.pl/Dziennik-Muzyka'), | ||
(u'Kultura', u'http://rss.dziennik.pl/Dziennik-Kultura'), | ||
(u'Nauka', u'http://rss.dziennik.pl/Dziennik-Nauka'), | ||
(u'Podróże', u'http://rss.dziennik.pl/Dziennik-Podroze/'), | ||
(u'Nieruchomości', u'http://rss.dziennik.pl/Dziennik-Nieruchomosci')] | ||
|
||
def append_page(self, soup, appendtag): | ||
tag=soup.find('a', attrs={'class':'page_next'}) | ||
if tag: | ||
appendtag.find('div', attrs={'class':'article_paginator'}).extract() | ||
while tag: | ||
soup2= self.index_to_soup(tag['href']) | ||
tag=soup2.find('a', attrs={'class':'page_next'}) | ||
if not tag: | ||
for r in appendtag.findAll('div', attrs={'class':'art_src'}): | ||
r.extract() | ||
pagetext = soup2.find(name='div', attrs={'class':'article_body'}) | ||
for dictionary in self.remove_tags: | ||
v=pagetext.findAll(name=dictionary['name'], attrs=dictionary['attrs']) | ||
for delete in v: | ||
delete.extract() | ||
pos = len(appendtag.contents) | ||
appendtag.insert(pos, pagetext) | ||
if appendtag.find('div', attrs={'class':'article_paginator'}): | ||
appendtag.find('div', attrs={'class':'article_paginator'}).extract() | ||
|
||
|
||
|
||
|
||
def preprocess_html(self, soup): | ||
self.append_page(soup, soup.body) | ||
return soup |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
__license__ = 'GPL v3' | ||
__copyright__ = '2011, Kovid Goyal <kovid at kovidgoyal.net>, Armin Geller' | ||
''' | ||
Fetch echo-online.de | ||
''' | ||
|
||
from calibre.web.feeds.recipes import BasicNewsRecipe | ||
class Echo_Online(BasicNewsRecipe): | ||
title = u' Echo Online' | ||
description = '-Echo Online-' | ||
publisher = 'Echo Online GmbH' | ||
category = 'News, Germany' | ||
__author__ = 'Armin Geller' # 2011-12-17 | ||
language = 'de' | ||
lang = 'de-DE' | ||
encoding = 'iso-8859-1' | ||
timefmt = ' [%a, %d %b %Y]' | ||
|
||
oldest_article = 7 | ||
max_articles_per_feed = 2 | ||
no_stylesheets = True | ||
auto_cleanup = True | ||
remove_javascript = True | ||
|
||
feeds = [ | ||
(u'Topnews', u'http://www.echo-online.de/storage/rss/rss/topnews.xml'), | ||
(u'Darmstadt', u'http://www.echo-online.de/rss/darmstadt.xml'), | ||
(u'Darmstadt-Dieburg', u'http://www.echo-online.de/rss/darmstadtdieburg.xml'), | ||
(u'Kreis Gro\xdf-Gerau', u'http://www.echo-online.de/rss/kreisgrossgerau.xml'), | ||
(u'R\xfcsselsheim', u'http://www.echo-online.de/rss/ruesselsheim.xml'), | ||
(u'Kreis Bergstra\xdfe', u'http://www.echo-online.de/rss/bergstrasse.xml'), | ||
(u'Odenwaldkreis', u'http://www.echo-online.de/rss/odenwald.xml'), | ||
(u'SV 98', u'http://www.echo-online.de/rss/sv98.xml'), | ||
(u'Kino', u'http://www.echo-online.de/rss/kino.xml'), | ||
(u'Ausstellungen', u'http://www.echo-online.de/rss/ausstellungen.xml'), | ||
(u'Ausflug & Reise', u'http://www.echo-online.de/rss/ausflugreise.xml'), | ||
] | ||
|
||
def print_version(self, url): | ||
return self.browser.open_novisit(url).geturl() + '?_FRAME=33&_FORMAT=PRINT' | ||
|
||
remove_tags = [dict(name='div', attrs={'class':["header", "name"]}),] | ||
auto_cleanup_keep = '//div[@class="bild_gross w270"]' | ||
|
||
# cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-ash2/41801_145340745513489_893927_n.jpg' # 2011-12-16 AGe | ||
cover_url = 'http://adcounter.darmstaedter-echo.de/webdav/files/config/gui/images/Zeitungsfaecher.gif' # 2011-12-16 AGe | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
################################################################################ | ||
#Description: http://es.hu/ RSS channel | ||
#Author: Bigpapa (bigpapabig@hotmail.com) | ||
#Date: 2010.12.01. - V1.0 | ||
################################################################################ | ||
|
||
from calibre.web.feeds.recipes import BasicNewsRecipe | ||
|
||
class elet_es_irodalom(BasicNewsRecipe): | ||
title = u'Elet es Irodalom' | ||
__author__ = 'Bigpapa' | ||
oldest_article = 7 | ||
max_articles_per_feed = 20 # Az adott e-bookban tarolt cikkek feedenkenti maximalis szamat adja meg. | ||
no_stylesheets = True | ||
#delay = 1 | ||
use_embedded_content = False | ||
encoding = 'iso-8859-2' | ||
category = 'Cikkek' | ||
language = 'hu' | ||
publication_type = 'newsportal' | ||
extra_css = '.doc_title { font: bold 30px } .doc_author {font: bold 14px} ' | ||
|
||
keep_only_tags = [ | ||
dict(name='div', attrs={'class':['doc_author', 'doc_title', 'doc']}) | ||
|
||
] | ||
|
||
remove_tags = [ | ||
dict(name='a', attrs={'target':['_TOP']}), | ||
dict(name='div', attrs={'style':['float: right; margin-left: 5px; margin-bottom: 5px;', 'float: right; margin-left: 5px; margin-bottom: 5px;']}), | ||
|
||
|
||
] | ||
|
||
|
||
|
||
feeds = [ | ||
(u'Publicisztika', 'http://www.feed43.com/4684235031168504.xml'), | ||
(u'Interj\xfa', 'http://www.feed43.com/4032465460040618.xml'), | ||
(u'Visszhang', 'http://www.feed43.com/3727375706873086.xml'), | ||
(u'P\xe1ratlan oldal', 'http://www.feed43.com/2525784782475057.xml'), | ||
(u'Feuilleton', 'http://www.feed43.com/7216025082703073.xml'), | ||
(u'Pr\xf3za', 'http://www.feed43.com/8760248802326384.xml'), | ||
(u'Vers', 'http://www.feed43.com/1737324675134275.xml'), | ||
(u'K\xf6nyvkritika', 'http://www.feed43.com/1281156550717082.xml'), | ||
(u'M\u0171b\xedr\xe1lat', 'http://www.feed43.com/1851854623681044.xml') | ||
|
||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai | ||
|
||
from calibre.web.feeds.news import BasicNewsRecipe | ||
class eMuzyka(BasicNewsRecipe): | ||
title = u'eMuzyka' | ||
__author__ = 'fenuks' | ||
description = u'Emuzyka to największa i najpopularniejsza strona o muzyce w Polsce' | ||
category = 'music' | ||
language = 'pl' | ||
cover_url='http://s.emuzyka.pl/img/emuzyka_invert_small.jpg' | ||
no_stylesheets = True | ||
oldest_article = 7 | ||
max_articles_per_feed = 100 | ||
keep_only_tags=[dict(name='div', attrs={'id':'news_container'}), dict(name='h3'), dict(name='div', attrs={'class':'review_text'})] | ||
remove_tags=[dict(name='span', attrs={'id':'date'})] | ||
feeds = [(u'Aktualno\u015bci', u'http://www.emuzyka.pl/rss.php?f=1'), (u'Recenzje', u'http://www.emuzyka.pl/rss.php?f=2')] |
Oops, something went wrong.