Permalink
Browse files

Allow multiple calendar posts and allow loading external images from …

…calendar pages
  • Loading branch information...
paolobarbolini committed Sep 14, 2017
1 parent ca0c43d commit be74a89b9d8e3436716c9e31785694f1b4d8b7e2
Showing with 19 additions and 12 deletions.
  1. +11 −9 telegramschoolbot/tasks.py
  2. +8 −3 telegramschoolbot/utils.py
@@ -38,7 +38,7 @@ def query_main_page(self):
parsed_html = BeautifulSoup(response.text, "html.parser")

# Find the url of the calendar article
calendar_article_url = None
calendar_articles = []
left_content = parsed_html.find("div", {"id": "jsn-pleft"})
left_links = left_content.find_all("a")
for link in left_links:
@@ -50,8 +50,7 @@ def query_main_page(self):
if not ("Orario" in text and "lezioni" in text):
continue

calendar_article_url = urllib.parse.urljoin(self.config["school_website"], link.get("href"))
break
calendar_articles.append(urllib.parse.urljoin(self.config["school_website"], link.get("href")))

# Generate the list of posts
posts = []
@@ -62,7 +61,7 @@ def query_main_page(self):
url = urllib.parse.urljoin(self.config["school_website"], post_urls[i].find("a").get("href"))
posts.append(models.Post(url=url, title=title))

return calendar_article_url, posts
return calendar_articles, posts


def query_calendar_article(self, url):
@@ -192,16 +191,19 @@ def update_posts_table_and_notify(self, bot, posts):


def run(self, bot):
calendar_article_url, posts = self.query_main_page()
calendar_articles, posts = self.query_main_page()

# This default makes all of the classes, teachers and classrooms
# go away if we can't find the page listing them
calendar_pages = []
if calendar_article_url is not None:
calendar_url = self.query_calendar_article(calendar_article_url)
for article in calendar_articles:
calendar_url = self.query_calendar_article(article)

if calendar_url is None:
continue

if calendar_url is not None:
calendar_pages = self.query_calendar(calendar_url)
calendar_pages = self.query_calendar(calendar_url)
break

self.update_pages_table(calendar_pages)
self.update_posts_table_and_notify(bot, posts)
@@ -9,6 +9,7 @@
from bs4 import BeautifulSoup
from datetime import datetime
from urllib.parse import urlparse
import urllib

import hashlib
import os
@@ -35,11 +36,15 @@ def send_cached_photo(bot, message, file_id, caption):
bot.api.call("sendPhoto", args)


def prettify_page(html):
def prettify_page(page_url, html):
parsed_html = BeautifulSoup(html, "html.parser")

# Find all images
for img in parsed_html.find_all("img"):
img["src"] = urllib.parse.urljoin(page_url, img["src"])

# Remove the default styles
for p in parsed_html.find_all('style'):
for p in parsed_html.find_all("style"):
p.decompose()

# Custom css
@@ -97,7 +102,7 @@ def send_page(db, bot, message, page, caption):

# The page did change, prepare the html file for wkhtmltoimage
html_path = "/tmp/tsb-body-%s.html" % body_md5
prettified_body = prettify_page(response.text)
prettified_body = prettify_page(page.url, response.text)
with open(html_path, "w") as f:
f.write(prettified_body)

0 comments on commit be74a89

Please sign in to comment.