-
Notifications
You must be signed in to change notification settings - Fork 0
/
crawler.py
104 lines (84 loc) · 3.15 KB
/
crawler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# -*- coding: utf-8 -*-
import requests
from bs4 import BeautifulSoup
from telegram.ext import Updater, CommandHandler
import logging
import config
import dateparser
from datetime import datetime, time, timedelta
logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.INFO)
logger = logging.getLogger(__name__)
def error(bot, update, error):
"""Log Errors caused by Updates."""
logger.warning('Update "%s" caused error "%s"', update, error)
def get_html(url):
r = requests.get(url)
return r.text
def get_all_links(html):
soup = BeautifulSoup(html, 'lxml')
matches = soup.find_all('div', class_='matche__score')
urls = []
for url in matches:
u = url.find('a').get('href')
urls.append(config.url[:25] + u)
return urls
def get_match_info(html):
soup = BeautifulSoup(html, 'lxml')
match_info = []
tag_duel = 'duel__team duel__team--'
tournament = soup.find('div', class_='duel__wrapper container').find('a').contents[0]
tournament = ' '.join(tournament.split())
match_time = soup.find('time').contents[0]
match_time = ' '.join(match_time.split())
match_time = dateparser.parse(match_time)
if datetime.now() + timedelta(hours=24) <= match_time:
return
match_time = str(match_time.strftime('%H:%M'))
team1 = soup.find('div', class_=tag_duel+'left ').find('h2').contents[0]
team2 = soup.find('div', class_=tag_duel+'right ').find('h2').contents[0]
match_info.append(tournament)
match_info.append(match_time)
match_info.append(team1)
match_info.append(team2)
return match_info
def crawler():
links = get_all_links(get_html(config.url))
today_matches = []
for l in links:
today_matches.append(get_match_info(get_html(l)))
today_matches = [x for x in today_matches if x is not None]
return today_matches
def start():
pass
def post(bot, update):
get_matches = crawler()
if not get_matches:
bot.send_message(chat_id=config.chat_id, text='В следующие сутки матчей не будет')
return
today_matches = {}
for match in get_matches:
if match[0] in today_matches:
today_matches[match[0]].append(match[1:])
else:
today_matches[match[0]] = [match[1:]]
today_matches_markdown = str('Расписание матчей на ближайшие 24 часа: \n\n')
for match in today_matches.items():
matches = str()
for m in match[1]:
matches += u"\u2022" + m[0] + ' ' + m[1] + ' vs ' + m[2] + '\n'
today_matches_markdown += '*' + match[0] + '*:\n' + matches + "\n"
bot.send_message(chat_id=config.chat_id, text=today_matches_markdown, parse_mode='Markdown')
def main():
updater = Updater(config.token)
dp = updater.dispatcher
dp.add_handler(CommandHandler("start", start))
dp.add_handler(CommandHandler("post", post))
dp.add_error_handler(error)
now_time = datetime.now().time()
job_queue = updater.job_queue
job = job_queue.run_once(post, 0)
updater.start_polling()
updater.idle()
if __name__ == '__main__':
main()