# LibrusBot

This notebook basically contains everything that already is in .py files.

In [None]:
import chromedriver_autoinstaller
import json
import os
import pathlib
import platform
import requests
import sys
import time

from bs4 import BeautifulSoup
from datetime import datetime
from datetime import timedelta
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait

# Constants
USER_CREDS_PATH = "sample_user_creds.json"
TG_CREDS_PATH = "sample_tg_creds.json"
DELAY = 10
SEC_BETWEEN_MSGS = 5
URL_LOGIN = "https://portal.librus.pl/rodzina/synergia/loguj"
URL_OGLOSZENIA = "https://synergia.librus.pl/ogloszenia"
URL_WIADOMOSCI = "https://synergia.librus.pl/wiadomosci"

# Inits
new_last_checked = datetime.strftime(datetime.now(), "%Y-%m-%d %H:%M:%S")

In [None]:
# BROWSER
def element_located(xpath, click=False, type_text=None, switch_iframe=False):
    try:
        element = WebDriverWait(browser, DELAY).until(
            EC.presence_of_element_located((By.XPATH, xpath))
        )
        if click:
            element.click()
        if type_text:
            # clear field & type text
            element.send_keys(Keys.CONTROL + "a")
            element.send_keys(type_text)
        if switch_iframe:
            browser.switch_to.frame(element)
    except Exception:
        sys.exit()
        # print(f"Exception caught: {e}")


def all_elements_located(xpath):
    """Return iterable of all elements located."""
    
    try:
        return WebDriverWait(browser, DELAY).until(
            EC.presence_of_all_elements_located((By.XPATH, xpath))
        )
    except Exception:
        sys.exit()


def page_loaded(xpath):
    try:
        WebDriverWait(browser, DELAY).until(
            EC.presence_of_element_located((By.XPATH, xpath))
        )
        return True
    except Exception:
        return False


def grab_attribute(xpath, attribute):
    try:
        element = WebDriverWait(browser, DELAY).until(
            EC.presence_of_element_located((By.XPATH, xpath))
        )
        return element.get_attribute(attribute)
    except Exception:
        sys.exit()

        
# TELEGRAM
def telegram_bot_sendtext(bot_message, bot_token, bot_chat_id):
    send_text = 'https://api.telegram.org/bot' + bot_token \
                + '/sendMessage?chat_id=' + bot_chat_id \
                + '&parse_mode=Markdown&text=' + bot_message
    requests.get(send_text)
    time.sleep(SEC_BETWEEN_MSGS)

    
def telegram_bot_sendfile(files, bot_token, bot_chat_id):
    send_text = 'https://api.telegram.org/bot' + bot_token \
                + '/sendDocument?chat_id=' + bot_chat_id
    requests.post(send_text, files=files)
    time.sleep(SEC_BETWEEN_MSGS)


def split_in_chunks(long_msg, characters=3000):
    if len(long_msg) < characters:
        return [long_msg]
    else:
        sp_idx = long_msg[:characters].rfind(' ')
        beginning = long_msg[:sp_idx]
        remaining = long_msg[sp_idx+1:]
        return [beginning] + split_in_chunks(remaining, characters)

In [None]:
# Grab User creds
with open(USER_CREDS_PATH, "r") as f:
    user_creds = json.load(f)[1]
# user_creds

# Grab TG creds
with open(TG_CREDS_PATH, "r") as f:
    tg_creds = json.load(f)
# tg_creds

In [None]:
chromedriver_autoinstaller.install()

In [None]:
# Chrome options
this_os = platform.system()[:3].lower()

if this_os == 'win':
    chrome_userdata = os.path.join(dict(os.environ)["LOCALAPPDATA"], "Google\\Chrome\\User data\\Default")
    # downloads = "C:/Temp"
elif this_os == 'lin':
    chrome_userdata = os.path.join(dict(os.environ)["HOME"], ".config\\google-chrome\\Default")
    
options = webdriver.ChromeOptions()

options.add_experimental_option("detach", True)
options.add_experimental_option("useAutomationExtension", False)
options.add_experimental_option("excludeSwitches", ["enable-automation"])
# options.add_argument('--headless')
# options.add_argument('--no-sandbox')
options.add_argument(f"user-data-dir={chrome_userdata}")
prefs = {
    "credentials_enable_service": False,
    "profile.password_manager_enabled": False,
    "download.default_directory" : user_creds["download path"],
}  # user_creds["download path"]
options.add_experimental_option("prefs", prefs)  # turn off chrome notification 'save password"

In [None]:
browser = webdriver.Chrome(options=options)
browser.maximize_window()
browser.get(URL_LOGIN)

In [None]:
# Navigate to login menu
element_located(
    "//a[@class='btn btn-third btn-synergia-top btn-navbar dropdown-toggle']", 
    click=True
)

element_located(
    "//div[@class='dropdown-menu dropdown-menu--wide dropdown-menu--gray show']" \
    "/a[@class='dropdown-item dropdown-item--synergia'][2]", 
    click=True
)

In [None]:
# Switch to iframe
element_located("//iframe[@id='caLoginIframe']", switch_iframe=True)

In [None]:
# Enter login data and click "Zaloguj"
element_located("//input[@id='Login']", type_text=user_creds["login"])
element_located("//input[@id='Pass']", type_text=user_creds["password"])
element_located("//button[@id='LoginBtn']", click=True)

browser.switch_to.default_content()

if not page_loaded("//div[@id='footer']"):
    sys.exit()

# Ogłoszenia

In [None]:
# Go to "Ogłoszenia" page
browser.get(URL_OGLOSZENIA)

if not page_loaded("//div[@id='footer']"):
    sys.exit()

In [None]:
# grab html
tb_html = grab_attribute("//div[@class='container-background']", "innerHTML")
soup = BeautifulSoup(tb_html, "html.parser")

In [None]:
last_checked = datetime.strptime(user_creds["last checked"], "%Y-%m-%d %H:%M:%S")

# create Ogłoszenia DICT
ogloszenia = dict()
ogl_count = 0

for tbl in soup.find_all("table"):
    dt_issued = tbl.tbody.tr.find_next_sibling().td.text
    dt_issued_obj = datetime.strptime(dt_issued, "%Y-%m-%d")
    
    if dt_issued_obj >= last_checked:
        ogl_count += 1

        ogloszenia[f"item_{ogl_count}"] = dict()
        o = ogloszenia[f"item_{ogl_count}"]
        
        # o - for shorter reference to one item, less typing
        o["Tytuł"] = tbl.thead.tr.td.text
        o["Dodał"] = tbl.tbody.tr.td.text
        o["Data publikacji"]  = tbl.tbody.tr.find_next_sibling().td.text
        o["Treść"] = tbl.tbody.tr.find_next_sibling().find_next_sibling().td.text

In [None]:
# create Ogłoszenia TG message from Dict
tg_ogloszenia = list()
tg_ogloszenia.append(f"= = = {user_creds['name']}, rodzic = = =")
tg_ogloszenia.append(f"- - - OGŁOSZENIA: {ogl_count} - - -")

all_ogloszenia = ""        
item_count = 1
if ogl_count > 0:
    for item in ogloszenia.values():
        for key, value in item.items():
            all_ogloszenia += f"{key}: {value}\n"
        all_ogloszenia += f"\n- - - Koniec ogłoszenia {item_count}/{ogl_count}. - - -\n\n\n"
        item_count += 1

In [None]:
tg_ogloszenia.append(all_ogloszenia)

# Wiadomośći - odebrane

In [None]:
# Go to "Wiadomośći" page
browser.get(URL_WIADOMOSCI)

if not page_loaded("//div[@id='footer']"):
    sys.exit()

In [None]:
# grab html
tb_html = grab_attribute("//table[@class='decorated stretch']", "innerHTML")
soup = BeautifulSoup(tb_html, "html.parser").tbody

In [None]:
# Create "Wiadomośći" dict
wiadomosci = dict()
wiad_count = 0

for row in soup.find_all("tr"):
    wyslano = row.td.next_sibling.next_sibling.next_sibling.next_sibling.next_sibling.next_sibling.next_sibling.text
    wyslano_dt_obj = datetime.strptime(wyslano, "%Y-%m-%d %H:%M:%S")
    
    if wyslano_dt_obj >= last_checked:
        wiad_count += 1
        
        wiadomosci[f"item_{wiad_count}"] = dict()
        w = wiadomosci[f"item_{wiad_count}"]  # as row above - for faster reference
        
        nadawca = row.td.next_sibling.next_sibling.next_sibling.next_sibling
        w["Nadawca"] = nadawca.text.strip()
        temat = nadawca.next_sibling
        w["Temat"] = temat.text.strip()
        w["Wysłano"] = wyslano
        
        # print(row.td.next_sibling.next_sibling.img == None)
        
        w["Załącznik"] = "Nie" if row.td.next_sibling.next_sibling.img == None else "TAK"
        
        # Grab hyperlinks
        w["Link"] = grab_attribute(f"//a[contains(text(), '{w['Temat']}')]", "href")
        
        # Go to email page
        browser.get(w["Link"])
        
        # Grab inner html & Treść
        wiad_html = grab_attribute("//div[@class='container-message-content']", "innerHTML")
        soup = BeautifulSoup(wiad_html, "html.parser")
        w["Treść"] = soup.text
        
        if w["Załącznik"] == "TAK":
            downloaded_files = list()
            
            # Get files names
            tb_html = grab_attribute("//table[@class='stretch container-message']/tbody/tr/td[2]/table[3]/tbody", "innerHTML")
            soup = BeautifulSoup(tb_html, "html.parser")
            
            zalaczniki = list()
            
            for row in soup.find_all("tr"):
                if "Pliki:" in row.td.text:
                    continue
                else:
                    zalaczniki.append(row.td.text.strip())
            w["Pliki"] = zalaczniki
            
            # Download attachments
            dl_buttons = all_elements_located("//img[@src='/assets/img/homework_files_icons/download.png']")

            for count, button in enumerate(dl_buttons):
                file_name = w["Pliki"][count]
                existing_downloaded_files = list()
                for (dirpath, dirnames, filenames) in os.walk(user_creds["download path"]):
                    existing_downloaded_files.extend(filenames)
                    break

                button.click()
                
                fully_downloaded = False
                while not fully_downloaded:
                    # Get list of all files in downloads directory
                    dloaded = []
                    for (dirpath, dirnames, filenames) in os.walk(user_creds["download path"]):
                        dloaded.extend(filenames)
                        break
                    
                    if len(dloaded) == len(existing_downloaded_files):
                        time.sleep(1)
                    else:
                        # Ensure there's no .crdownload
                        if all([".crdownload" not in f for f in dloaded]):
                            # Find downloaded file
                            file_to_append = list(set(dloaded) - set(existing_downloaded_files))[0]
                            downloaded_files.append(os.path.join(user_creds["download path"], file_to_append))
                            fully_downloaded = True
                            break
                        else:
                            time.sleep(1)

                browser.switch_to.window(browser.window_handles[1])
                browser.close()
                browser.switch_to.window(browser.window_handles[0])
            
            # and lastly add the list of downloaded filepaths to Wiadomośći dictionary
            w["Pobrane"] = downloaded_files
            
        browser.back()        
    else:
        break


# Send all to TG

In [None]:
# Ogłoszenia
for og in tg_ogloszenia:
    split_msg_parts = split_in_chunks(og)
    for m in split_msg_parts:
        telegram_bot_sendtext(
            m,
            tg_creds["bot_token"],
            tg_creds["bot_chat_id"]
        )

In [None]:
# Wiadomośći
telegram_bot_sendtext(
    f"- - - WIADOMOŚĆI: {len(wiadomosci)} - - -",
    tg_creds["bot_token"],
    tg_creds["bot_chat_id"]
)

In [None]:
for idx, (key, value) in enumerate(wiadomosci.items()):
    msg = ""
    msg += f"Nadawca: {value['Nadawca']}"
    msg += f"\nTemat: {value['Temat']}"
    msg += f"\nWysłano: {value['Wysłano']}"
    msg += f"\n\nTreść: {value['Treść']}"
    msg += f"\n\n- - - Koniec wiadomośći {idx+1}/{len(wiadomosci)} - - -"
    msg += f"\n\nZałącznik: {value['Załącznik']}"
    
    if value['Załącznik'] == "TAK":
        msg += f" ({len(value['Pobrane'])})"
    
    split_msg_parts = split_in_chunks(msg)
    
    for m in split_msg_parts:
        telegram_bot_sendtext(
            m,
            tg_creds["bot_token"],
            tg_creds["bot_chat_id"]
        )

    if value["Załącznik"] == "TAK":
        for file in value["Pobrane"]:
            telegram_bot_sendfile(
                {"document": open(file, "rb")},
                tg_creds["bot_token"],
                tg_creds["bot_chat_id"]
            )

In [None]:
browser.close()