This repository has been archived by the owner on May 21, 2021. It is now read-only.
/
tv_foxlife.recipe
102 lines (89 loc) · 4.04 KB
/
tv_foxlife.recipe
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import re
import datetime
from calibre.web.feeds.news import BasicNewsRecipe
class programtvRecipe(BasicNewsRecipe):
__license__ = 'GPL v3'
__author__ = u'intromatyk <intromatyk@gmail.com>'
language = 'pl'
version = 1
title = u'Program FOX Life'
category = u'News'
description = ''
cover_url=''
remove_empty_feeds= True
no_stylesheets=True
oldest_article = 1
max_articles_per_feed = 100000
recursions = 0
no_stylesheets = True
remove_javascript = True
simultaneous_downloads = 2
keep_only_tags =[]
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'mod mod_program'}))
remove_tags = []
remove_tags.append(dict(name = 'ul'))
remove_tags.append(dict(attrs = {'class' : 'channel_bar'}))
remove_tags_after =[dict(attrs = {'class' : 'txt' })]
def parse_index(self):
channel_id = '382,Fox_Life'
today = datetime.date.today()
one_day = datetime.timedelta(days=1)
two_days = datetime.timedelta(days=2)
three_days = datetime.timedelta(days=3)
four_days = datetime.timedelta(days=4)
five_days = datetime.timedelta(days=5)
six_days = datetime.timedelta(days=6)
seven_days = datetime.timedelta(days=7)
day_one = today
day_two = today + one_day
day_three = today + two_days
day_four = today + three_days
day_five = today + four_days
day_six = today + five_days
day_seven = today + six_days
day_eight = today + seven_days
Weekday = ['Poniedzialek', 'Wtorek', 'Sroda', 'Czwartek', 'Piatek', 'Sobota', 'Niedziela']
self.log('\t\tDate:', day_one)
feeds = []
for title, url in [
('%s %s' % (Weekday[datetime.date.weekday(day_one)],day_one), 'http://tv.gazeta.pl/program_tv/0,110298,8700474,,,%s,3,%s.html' % (day_one, channel_id)),
('%s %s' % (Weekday[datetime.date.weekday(day_two)],day_two), 'http://tv.gazeta.pl/program_tv/0,110298,8700474,,,%s,3,%s.html' % (day_two, channel_id))
,
('%s %s' % (Weekday[datetime.date.weekday(day_three)],day_three), 'http://tv.gazeta.pl/program_tv/0,110298,8700474,,,%s,3,%s.html' % (day_three, channel_id))
,
('%s %s' % (Weekday[datetime.date.weekday(day_four)],day_four), 'http://tv.gazeta.pl/program_tv/0,110298,8700474,,,%s,3,%s.html' % (day_four, channel_id))
,
('%s %s' % (Weekday[datetime.date.weekday(day_five)],day_five), 'http://tv.gazeta.pl/program_tv/0,110298,8700474,,,%s,3,%s.html' % (day_five, channel_id))
,
('%s %s' % (Weekday[datetime.date.weekday(day_six)],day_six), 'http://tv.gazeta.pl/program_tv/0,110298,8700474,,,%s,3,%s.html' % (day_six, channel_id))
,
('%s %s' % (Weekday[datetime.date.weekday(day_seven)],day_seven), 'http://tv.gazeta.pl/program_tv/0,110298,8700474,,,%s,3,%s.html' % (day_seven, channel_id))
]:
articles = self.nz_parse_section(url)
if articles:
feeds.append((title, articles))
return feeds
def nz_parse_section(self, url):
soup = self.index_to_soup(url)
div = soup.find(attrs={'class': 'body'})
current_articles = []
for tag in div.findAllNext('ul'):
self.log('\t\t\tTIME:', tag)
for li in tag.findAll(attrs = {'class' : ['odd', 'even', 'first odd']}):
a = li.find('a', href = True)
title_time = li.find(attrs={'class': 'time'})
if a is None:
continue
date = self.tag_to_string(title_time)
self.log('\t\t\tTIME:', title_time)
title_name = self.tag_to_string(a)
title = date + ' - '+ title_name
url = a.get('href', False)
if not url or not title:
continue
if url.startswith('/'):
url = 'http://tv.gazeta.pl'+url
self.log('\t\tFound article:', title)
self.log('\t\t\t', url)
current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
return current_articles