Skip to content
This repository has been archived by the owner on Nov 12, 2020. It is now read-only.


Allow multiple calendar posts and allow loading external images from …
Browse files Browse the repository at this point in the history
…calendar pages
  • Loading branch information
paolobarbolini committed Sep 14, 2017
1 parent ca0c43d commit be74a89
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 12 deletions.
20 changes: 11 additions & 9 deletions telegramschoolbot/
Expand Up @@ -38,7 +38,7 @@ def query_main_page(self):
parsed_html = BeautifulSoup(response.text, "html.parser")

# Find the url of the calendar article
calendar_article_url = None
calendar_articles = []
left_content = parsed_html.find("div", {"id": "jsn-pleft"})
left_links = left_content.find_all("a")
for link in left_links:
Expand All @@ -50,8 +50,7 @@ def query_main_page(self):
if not ("Orario" in text and "lezioni" in text):

calendar_article_url = urllib.parse.urljoin(self.config["school_website"], link.get("href"))
calendar_articles.append(urllib.parse.urljoin(self.config["school_website"], link.get("href")))

# Generate the list of posts
posts = []
Expand All @@ -62,7 +61,7 @@ def query_main_page(self):
url = urllib.parse.urljoin(self.config["school_website"], post_urls[i].find("a").get("href"))
posts.append(models.Post(url=url, title=title))

return calendar_article_url, posts
return calendar_articles, posts

def query_calendar_article(self, url):
Expand Down Expand Up @@ -192,16 +191,19 @@ def update_posts_table_and_notify(self, bot, posts):

def run(self, bot):
calendar_article_url, posts = self.query_main_page()
calendar_articles, posts = self.query_main_page()

# This default makes all of the classes, teachers and classrooms
# go away if we can't find the page listing them
calendar_pages = []
if calendar_article_url is not None:
calendar_url = self.query_calendar_article(calendar_article_url)
for article in calendar_articles:
calendar_url = self.query_calendar_article(article)

if calendar_url is None:

if calendar_url is not None:
calendar_pages = self.query_calendar(calendar_url)
calendar_pages = self.query_calendar(calendar_url)

self.update_posts_table_and_notify(bot, posts)
11 changes: 8 additions & 3 deletions telegramschoolbot/
Expand Up @@ -9,6 +9,7 @@
from bs4 import BeautifulSoup
from datetime import datetime
from urllib.parse import urlparse
import urllib

import hashlib
import os
Expand All @@ -35,11 +36,15 @@ def send_cached_photo(bot, message, file_id, caption):"sendPhoto", args)

def prettify_page(html):
def prettify_page(page_url, html):
parsed_html = BeautifulSoup(html, "html.parser")

# Find all images
for img in parsed_html.find_all("img"):
img["src"] = urllib.parse.urljoin(page_url, img["src"])

# Remove the default styles
for p in parsed_html.find_all('style'):
for p in parsed_html.find_all("style"):

# Custom css
Expand Down Expand Up @@ -97,7 +102,7 @@ def send_page(db, bot, message, page, caption):

# The page did change, prepare the html file for wkhtmltoimage
html_path = "/tmp/tsb-body-%s.html" % body_md5
prettified_body = prettify_page(response.text)
prettified_body = prettify_page(page.url, response.text)
with open(html_path, "w") as f:

Expand Down

0 comments on commit be74a89

Please sign in to comment.