This repository has been archived by the owner on May 21, 2021. It is now read-only.
/
wysokie_obcasy.recipe
57 lines (47 loc) · 1.82 KB
/
wysokie_obcasy.recipe
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#!/usr/bin/env python
__license__ = 'GPL v3'
from calibre.web.feeds.news import BasicNewsRecipe
class WysokieObcasyRecipe(BasicNewsRecipe):
__author__ = u'Artur Stachecki <artur.stachecki@gmail.com>'
language = 'pl'
version = 1
title = u'Wysokie Obcasy'
publisher = 'Agora SA'
description = u'Serwis sobotniego dodatku do Gazety Wyborczej'
category='magazine'
language = 'pl'
publication_type = 'magazine'
cover_url=''
remove_empty_feeds= True
no_stylesheets=True
oldest_article = 7
max_articles_per_feed = 100000
recursions = 0
no_stylesheets = True
remove_javascript = True
simultaneous_downloads = 5
keep_only_tags =[]
keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'article'}))
remove_tags =[]
remove_tags.append(dict(name = 'img'))
remove_tags.append(dict(name = 'p', attrs = {'class' : 'info'}))
extra_css = '''
body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
h1{text-align: left;}
'''
feeds = [
('Wszystkie Artykuly', 'feed://www.wysokieobcasy.pl/pub/rss/wysokieobcasy.xml'),
]
def print_version(self,url):
baseURL='http://www.wysokieobcasy.pl/wysokie-obcasy'
segments = url.split(',')
subPath= '/2029020,'
articleURL1 = segments[1]
articleURL2 = segments[2]
printVerString=articleURL1 + ',' + articleURL2
s= baseURL + subPath + printVerString + '.html'
return s
def get_cover_url(self):
soup = self.index_to_soup('http://www.wysokieobcasy.pl/wysokie-obcasy/0,0.html')
self.cover_url = soup.find(attrs={'class':'holder_cr'}).find('img')['src']
return getattr(self, 'cover_url', self.cover_url)