### Imports

In [1]:
import cv2
import base64
import ffmpeg
import ffmpeg._probe
import getpass
import glob
import json
import logging
import ntplib
import numpy as np
import os
import pandas as pd
import random
import re
import requests
import shutil
import subprocess
import sys
import time
import urllib.request
import winreg

from bs4 import BeautifulSoup
from datetime import datetime, timedelta
from importlib import reload
from tqdm import tqdm

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.common.exceptions import ElementClickInterceptedException
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options as ChromeOptions

from PyQt5.QtCore import *
from PyQt5.QtGui import *
from PyQt5.QtNetwork import *
from PyQt5.QtWidgets import *
from PyQt5.QtMultimedia import *
from PyQt5.QtMultimediaWidgets import *

In [2]:
# Function to sleep while printing progress
def load_page(load_time=5):
    load_time = float(load_time)
    while load_time > 0:
        print(f'Loading for {load_time} seconds...', end='\r')
        time.sleep(0.5)
        load_time -= 0.5
    print('Loading finished                       ')

# Function to download file from url with progress
def url_retrieve(url, output_path, desc="Downloading"):
    with tqdm(unit="B", unit_scale=True, desc=desc, miniters=1) as t:
        urllib.request.urlretrieve(url, filename=output_path, reporthook=lambda blocknum, blocksize, totalsize: t.update(blocksize))
    print(f"Download successful: {output_path}")

# Function to attempt a function x times
def tryn(func, attempts=5, pause=False, mute=False):
    attempt = 0
    for attempt in range(1, attempts + 1):
        if not mute:
            func_name = "function"
            try:
                func_name = func.__name__
            except:
                pass
            print(f"Trying {func_name}... (Attempt {attempt}/{attempts})")
        try:
            func()
            break
        except Exception as e:
            print(e)
            if pause:
                input("Press enter to retry...")
            else:
                time.sleep(3)
    else:
        print(f"Failed after {attempt} attempts.")

# Input datetime object
def wait_until(target_time: datetime, fromServer=False):

    if fromServer:
        ntp_server = 'pool.ntp.org'
        ntp_client = ntplib.NTPClient()
        response = ntp_client.request(ntp_server)
        server_time = datetime.fromtimestamp(response.tx_time)
        computer_time = datetime.now()
        diff = server_time - computer_time
        print(f'Difference between server and computer time: {diff}')
        target_time = target_time - diff

    current_time = datetime.now()
    print(f"Current time: {current_time}")
    computer_time = datetime.now()
    wait_time = target_time - computer_time
    print(f"Target time: {target_time}")
    print(f"Wait time: {wait_time}")
    time.sleep(wait_time.total_seconds())
    print(f"Current time: {datetime.now()}")
    
    if fromServer:
        print(f"Adjusted time: {datetime.now() + diff}")

# Function to get size string from path
def get_sizestr(path, withUnit=True, setUnit='', dp=1):
    try:
        size_bytes = float(os.path.getsize(path))
    except FileNotFoundError:
        return 'File not found'
    return get_sizestr_bytes(size_bytes, withUnit, setUnit, dp)

# Function to get size string from bytes
def get_sizestr_bytes(size_bytes, withUnit=True, setUnit: str='', dp=1):

    units = ['B', 'KB', 'MB', 'GB', 'TB']
    if setUnit != '' and setUnit not in units:
        raise ValueError(f"Unit must be one of {units}")
    size_bytes = float(size_bytes)
    
    i = 0
    if setUnit:
        while units[i] != setUnit and i < len(units) - 1:
            size_bytes /= 1024.0
            i += 1
    else:
        while size_bytes >= 1024 and i < len(units) - 1:
            size_bytes /= 1024.0
            i += 1
    return f"{size_bytes:.{dp}f} {units[i]}" if withUnit else f"{size_bytes:.{dp}f}"

# Function to get datetime string from datetime object
def get_datetimestr(dt_object, format='%Y%m%d-%H%M'):
    return dt_object.strftime(format)

In [3]:
class DriverToolkit:

    # Initialize driver and directories
    def __init__(self, driver_path='./driver/chromedriver.exe'):
        
        os.makedirs('./driver', exist_ok=True)
        os.makedirs('./logs', exist_ok=True)

        self.driver_path = driver_path
        self.update_driver()

    # Check if chromedriver exists and up to date, if not download latest version
    def update_driver(self):

        # Get version of chrome installed
        try:
            reg_key = winreg.OpenKey(winreg.HKEY_CURRENT_USER, "Software\Google\Chrome\BLBeacon")
        except:
            raise Exception("Google Chrome not found/installed, download from https://www.google.com/intl/en_uk/chrome/")
        else:
            chrome_version, _ = winreg.QueryValueEx(reg_key, "version")
            chrome_version_v = ".".join(chrome_version.split(".")[:1])
            print("Google Chrome version:", chrome_version)
            winreg.CloseKey(reg_key)

        # Check if chromedriver is present and up to date
        dlDriver = False
        hasDriver = True
        if not os.path.exists(self.driver_path):
            dlDriver = True
            hasDriver = False
        else: # Get version of chromedriver with subprocess
            try: 
                chromedriver_version = subprocess.check_output([self.driver_path, '--version']).decode('utf-8').split()[1]
            except Exception as e:
                print(e)
                sys.exit()
            else: 
                print(f'Chromedriver version: {chromedriver_version}')

        # Check if versions match
        if hasDriver:
            if int(chrome_version.split('.')[0]) == int(chromedriver_version.split('.')[0]):
                print("Chromedriver up to date")
                return
            else:
                dlDriver = True
                print("Chromedriver out of date")    

        if dlDriver:
            # Get latest chromedriver version for chrome version
            url = "https://googlechromelabs.github.io/chrome-for-testing/latest-versions-per-milestone-with-downloads.json"
            response = requests.get(url)
            data = response.json()

            cd_version = data["milestones"][f"{chrome_version_v}"]["version"]
            cd_url = data["milestones"][f"{chrome_version_v}"]["downloads"]["chromedriver"][-1]["url"]
            urllib.request.urlretrieve(cd_url, './driver/chromedriver.zip')
            shutil.unpack_archive('./driver/chromedriver.zip', './driver')
            shutil.move('./driver/chromedriver-win64/LICENSE.chromedriver', './driver/LICENSE.chromedriver')
            shutil.move('./driver/chromedriver-win64/chromedriver.exe', './driver/chromedriver.exe')
            shutil.rmtree('./driver/chromedriver-win64')
            os.remove('./driver/chromedriver.zip')
            print(f"Downloaded Chromedriver version: {cd_version}")

    # Launch driver with options
    def get_driver(self, headless=False, adblock=False, adblock_path='./driver/adblock.crx', download_path=None):
        
        # Set options
        options = ChromeOptions()
        if headless: 
            options.add_argument('headless')
        if adblock: 
            if not os.path.exists(adblock_path): 
                raise FileNotFoundError(f'adblock.crx not found at {adblock_path}')
            options.add_extension('./driver/adblock.crx')
        
        # Set capabilities
        options.set_capability("goog:loggingPrefs", {'performance': 'ALL'})

        # Set preferences
        if download_path is None:
            download_path = os.getcwd()
        prefs = {
            "download.default_directory": download_path,
        }
        options.add_experimental_option("prefs", prefs)

        # Set window size and position
        try:
            with open('./logs/window_log.txt', 'r') as f:
                window_log = f.read()
        except:
            windowSize = [1050, 765]
            windowPos = [10, 10]
        else:
            window_log = [int(i) for i in window_log.split()]
            windowSize = [window_log[0], window_log[1]]
            windowPos = [window_log[2], window_log[3]]

        # Launch driver
        service = Service(self.driver_path)
        self.driver = webdriver.Chrome(service=service, options=options)
        self.driver.set_window_size(windowSize[0], windowSize[1])
        self.driver.set_window_position(windowPos[0], windowPos[1])

        # Manage adblock
        if adblock:
            time.sleep(5)
            self.driver.switch_to.window(self.driver.window_handles[1])
            self.driver.close()
            self.driver.switch_to.window(self.driver.window_handles[0])

        return self.driver

    # Save window size and position
    def save_window(self, logs_path='./logs/window_log.txt'):
        windowSize = self.driver.get_window_size()
        windowPos = self.driver.get_window_position()
        with open(logs_path, 'w') as f:
            f.write(f'{windowSize["width"]}\n{windowSize["height"]}\n{windowPos["x"]}\n{windowPos["y"]}')
            print(f'Saved - Size: {windowSize["width"]}x{windowSize["height"]}\tPosition: {windowPos["x"]},{windowPos["y"]}')

    # Clear driver network logs
    def reset_logs(self):
        self.driver.get_log("performance")
        print("Logs cleared")

    # Get driver network logs
    def get_logs(self, json_path='./logs/network_log.json'):

        raw_logs = self.driver.get_log("performance")
        with open(json_path, 'w', encoding='utf-8') as f:
            f.write("[")
            for log in raw_logs:
                network_log = json.loads(log["message"])["message"]
                if any(method in network_log["method"] for method in ["Network.response", "Network.request", "Network.webSocket"]):
                    f.write(json.dumps(network_log) + ",")
            f.write("{}]")

        with open(json_path, "r", encoding="utf-8") as f:
            self.logs = json.loads(f.read())
        
        return self.logs

In [4]:
dtk = DriverToolkit()
driver = dtk.get_driver(adblock=True, headless=False)

Google Chrome version: 116.0.5845.111
Chromedriver version: 116.0.5845.96
Chromedriver up to date


In [5]:
sys.exit()

SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [None]:
dtk.save_window()

### Core

### GUI

In [None]:
class MainWindow(QWidget):

    global driver
    
    def __init__(self):
        super(MainWindow, self).__init__()
        self.initUI()

    def initUI(self):
        
        


        box = QVBoxLayout(self)
        self.setLayout(box)

        icon_bytes = base64.b64decode(b'iVBORw0KGgoAAAANSUhEUgAAAIAAAACACAMAAAD04JH5AAABnlBMVEVHcEyAgIB+fn5+fn5+fn6AgIB/f3+EhISAgICBgYF9fX2AgIB9fX19fX2AgIB/f39/f39+fn5+fn5/f39+fn6AgID////krVzlyVz+/v6mpqb9/f2Hh4e/v7/J1lzkklvAwcH5+fnPz87X19fx8fGMjIyRkZGKioro6Oi1tLXc3N3TuEvg4OBc1q3u7u6oqKj39/ewsLDq6uvXhVDT09L7+/vLy8uUlJT09PSamprExMLj4+SioqJKxZydnp6EhIS7vbzMekS5ubnHx8aPj4/hqlrPl0aXl5ekpKW9tqi3t7jBvbPCrn3ix1uqqqlLvJjMum/JpXHF0VzaqmLbpFPXwGTVnkzfkl+6o5HQtUm9qIq/mYHbwFK3t5u7spfexV9exsbNskW5raDNlG6/ymaet7K0u3rTqGiAgICxtoqyv0i4wW6PtaqnoZmrvbnXkmW7yE5wwaa8sox2dsdxuLhlyaeWi26wj3NpppauhVpf0KrUgkt/u6fBiGCCsrJnZ6xUzqWpm4aVkYGDnpavpWSIhH2emXt3d5nErlXToVdCceMxAAAAFnRSTlMAFmfdzydJ+AvrxTCzlR54WYinOoIFpiZIZgAAD41JREFUeNq8WflP22oWfWQlIeytIbGJEtuJF6zEjmI7iSM7IqZkgYiwVAUBVSvEUoZR1Up03g+jUXnd5r+ez0sSf58DBV4794ckBuJ7fO85516bP/54WjyfnAkvRiJzICKRxfDM5PM//m8xGY4EphPxYCwWdSMWC8YT04FIePL3J58KTMdjUbOa4jIKxQuarmsCT5EZLlU1o7H4dGDqN4KYiMzHY0xVUviCiGNOpFLuB1wsCIqkM7H4fGTit1z74nwoqkokK2KewFMp3HsssqSkRkPzi7+6DhOBRCwvUXQSgwMFACJJU1w+lgjM/ML04dkQ0SJpzB9jAFhBky0i9Cz8q9I/C+YkQcawhwPAMFmQcsFfAmFiNpjrsEkMexwA0Aq2kwvO/l0+Ts6FGI7F7ox7AIBgOSY097foOLUQTQlJ7KkAsKSQii5MPf3yA0GVzGLY0wFgWJZUg4EnFiG8EJUKY+hV4MvigwFgWEGKLjyJjJFQnvRRP8uS7cP+q9IjAGA4mQ9FHl/+2VgLJR9esrLvLi31qccAAGRsxWYf2YaJ6WhHRC5e6NrZQZw3hyxLpZI/B5AUO9HpRwlyJsEocPmzfHvzfMmN3XZWLAnlZoOTTFPiGs2yUBLvrQSuMIlHeHM4blIw8UD63UH6pfW9z62NHMGYalVnGL2qmgyR2yhmePoeEJQZfzAVp0KqAPewO0q/vvfyxYs/qx2FZ+mKKLZaolihWV7p1E3CLCol+S4Eghqaemh+BspfUQ7PvdlfvNzrk34OyDSfqTO5YrlyFwLmYQjCIYZY8VyH1u6vD9KD7HvWwW43OVYFWbapE2qmNHY8rRBM6AFdmImrwsoIQZY8dKu//tK6dvfziXiXDLO8xJiNwrj8K4Ia/ykTJxKmYP+xg4Du9kdX/3J9yMPD0t0+YI1Bs1bx55cxwUz8RI2T0ww1/HNwrpqbH04/sqI7jIiViCqF+/IDLTDT9zvSbFSBvlA62XWrv7fkjYEV3eWESb5OcLQvP4Yp0dl7/T/WkeGvCIfr1uXvrUP5gRXJP7FisclUeV9+TO7EIvcJoCX6ira5h16+TQL6p7NA05mmjOQHwFp3S2FyIc/62lbR/3y57su/tCmMASCTvHeAVDiCE5H8gB/5hbtoEIiSKHFxusdcn/vzL7lWBANgL8+Ovcuz3CR6NA7nxzAyGrjDAYOSjEqHq+fKdHsMAteKYADKVXrn6ljxQCjn6h0kP4ZLwanxDVBR+8A7FwTQG3uy6wPgWhE0jsXjnXTagjDamDCKuOBQlhTUsU2Ygxtg21CdsOHzh34WACtKZkW6XqfFrItBOEvbsXPW1pLDIhJ13x0NGZ0bY4GhFLp/ir0c5TSwvOknwVqjp6t5wzBVvdcoW7eMtdO0G6eXijigEZXriehilwr5DXEWHoHWtzmi7FJYHjjiaCj++6talxoZVc00pLrKMNUOeZkexs7lyFDLBCf7BqPPjsJBDt2tmkRzeBKxe+5Jb43k/76l5cHtuVyxboqJL+93hiXoevTnnAgySi6ImsGzHLqC8gznsZHCW2gorrtW5FFBoffu3TsXwulxwaN/mWN4dFjknqEF6KAErOoVryOxr3bhoWhbkQdA6TK98x5AsBugQf5T0asoETtICXwFwDlGgz3RkgK4/KEt2lbkAQBMIG1BAEU4K8P+l9QYVItsbhaWgIQwgCJqqCuT8FSwrWgEwDGBdBoU4aqGo/5bIyiEBRIkhACBSKBSrYs+V76GpoJtRSMAAxNI77z7yomo/4r1KrKhCITHkCcTLUQoNYJHXXlNQqbCIesBkByawM7xGiGtof7Leytqn7GVGNnhYgwxwYKJtgRfuzDI0ttdZCsaAqDdDgACChhpXKyh/iuZiNGTscUhgPk8QtKGT5SiZBgruOb1ZGsrGgKgBh04I0G5DEMSfcJrIDLLz48oyMG/K5kpBJG8RpBWW6lDeCsaAJC7bgeuarJFGJJYQ92vYyLb+oiGkShC0YxxfQw/FFNAC60TZ5U+tBUNAJRcGz5tVxz+1QgFLQGTQYQWHSxn8yp8vZWN/Obr7RPPXNdy1qpgIRC7fe9WNABAXrkELLn8l6WchgpPhYVAq24PJuMI48rE9f7ysgWhMBDRBj1Qo2c9sazIeVQrth0KXvJD/dEbdYQGPJhtMKK4o4MpRANyz/ywtQxia/ukWUra04Qa+cFoPdlta1S1Sml0UjtzCejxHwodQVm9KCM6cDajAANTvpSrbi87sbX9oVbCCnkJ9ziSu56AsXDDMIbBMGaqZ1Pwqit6/AeX8ojwmrkSwgrHi6Z1uFaK3QEPBMlkIU8kN92h+KlpPa6nMvqNNQFO2wXI/3zCYxmYmGJ12qUAbDm9vNOBQWzfXGu4F0Gj1neGYp93OECdWWPwWMvA/tdBvCdbL8L25JAgHFMQg9A3vfm3jt7sf+hqsndXdqfCeTNpqQAHJrDz/qbXQPyXZRD7XTFhuSmxsO0CAkLW1D4E4GB1eWt/szt8XC03LozPu+6zIguAbQJXdeOige7fVRE5NTxhBNsJAkihMrm3r70Ajt4cWW8AAu9srXjDMFr2erJ0WLAAWCZw1e0YRsM30+GEtJlBRo7FwmkYJt5TYQqsHriH+5tt+86rTKysENVDx4qKqWQWmMDpsWT9tIwMdRXes+R6D/ex8HkC+afLBkIB0AHrzXp5vdmmxIJazOIrhmk5Yl8ppjBgAjuXLdD/bBG9tUF70NlA/uGTeA5EwCGDqLUNAbA6sOWBULQmJRh4ub61FbVSePM0faba/PMJj0QsRkGcgAMymAlmkJW9tw1rYGtYAevl6Mtn6zkYQHB7DrYivQg2gauc4fAfFR7LlBEWwoSvBWeACmEjLjM9SARHFgWGFbA4ub9tjUrQhdvdpcN8jzo7vXXzYxpiNRW1cS8gS4eLMQrxy7cQgIPVASVtIFurq86couWM8W29T6S6V98G+eVsPSUjrEvCMoDnAwW2okgUlkojD6nQ4eDW8paXEg6E0orx6ZzYuHTy47TQ5AoZxGokPXtfRXhgBKgPceoxDODITe50wnUFe1TWesanW+LWyMhZluy0Pn78qFEMvAU0VBHWHedzormohuGjSEobJ69REQwJ4AFgz6n8168XFxKVKX50QtAYyns2LJOnk56Ti7qU9P5aA/fpc1E95Q2T+Haw6ok30NHqgff44MvNxcVF7vv3/zjxXa8bVehsqlH3HrbA6PaG/mQABwc/fnz58vkWFIBoFYeR0p8CAG3B8ZgWeCfD/v725od2l6QUyTQMg2iIyVFNH9+CB5Bw8HHrNUh986rbBFuYnNUaOmNwf+lrX6XRsvkEEqIyXMm3x8jQSv3hpFsrU6Y98mThn//Q80aG/ddfBe7memi4T5Chz4jM9j4E4Mf2/5q3lt7EkSCslfa+NyTWjgA/BsfiYcA2xtgMDkiTkNU4N5CsHSEQN0KEBANJACWTP77dbYNN2+DHZpRYOUXI/bm6uqr6q6/u/vlxo1KFqiJBWkyTUjl+tttNQP5J0+t1Vbl/fOXTTuCJH4j8ofjyzrE42uzXXyrPKe5r5XwBLL/YdRD/p5prPsW9Gis5lzQU+5PR5Z1t8X/RZstYQuPYt8VwMbObGtLMNMGS/MrY3jIJklEWJCNfOmYrP4DFxYKjE+HyXgsp9GRtDncyY9efzFQ3gU+k5a2xHXHJ0rG/IKmUPBYHjusWzYo4WQyH6zf+QOZvdHMCjJ+7BQgGYqKCxFeSlbXj2rm3v9IxFFx+uEPFl53/qLmuT6HtmdHWqK2+UglKMn9Rip0k3r6XOcsvJgUJFKX2+kR7qesbdPK5Qc1wHCFuUYpFojReO4M9IOBacPnhTgWlCCjLL5DTC5MnXZ/bfiauDKM2HjXjlOU8Kst9F5PuBcYXIU+iFvDzecR/tdtkGf6vuinq+hIdba78dVszag8DT68q4sXEdzUraznMUHAnS9PhLss4lF1arLO9JkHPAYCnGSE1e2yduh0DBH2rI8S8mvkvp9jZsRmjdBZ+/oH/K7W7eU1bvuj6y7StoVYlM3oACDLj21K8y2nA9Ry72NvXc6Z03H8qydekCZ9192+byWgO+giB7QjRr+c+gqLcyuG8Oe3rv6FdeYEWmIv7eE9bGYjgYSAScQiKAIoGI7e9FI3nHYVNEfhAcX7AT3TGCEHfUoUYFI2fpKrjNKVLUnm/gZojAEv3cAu3NoLM+KbcjUxSBdB0uBseaDrv+iAMIQBPs8OOpUujBxvB9udVdJrujz8rOFGJdw+kK1LG+38gDCEAxanHgMARIQLwNxAljKgsnSIqI1O1x28EYcgGsPH8OC1aGYDAeIaOoESkagPJasJPVmPcJwhDNoC5dwcJFboB3AXgCFVPWD5HVkek60n16FedpQNgeXTehJut8Yz8AJxHPhpdH61hIVyRFW9jePbkAADB2Gva8s9aJuMgsBw1WEjDIlrLJiXJ7Dc31zDT4h7AxAVL0N+6V8ARHQQZ60KJ0rKJ0rRCPq6RFc4ThmwAxWnJo93QminoiHsE4xsOFqshTasIbbu9LuIL2+OIfRhyAGwcMQHXY7/AZCgBRzwgAI7AhLftQhuX7gEBafA7DbDBMOQAQDWJQrv6HeXmwUXQt1psaOMyrHV7dEbVBlmvyNOnA4BlG7VuG+phK6rQDfYIao/3TGjr9nzzGo9KfK9BrnXwvJgmSIhmHjavj+QbBeuAoGY8j0dcWPP6fPs+QCvaWZvu8yZzeJOKsjI2ArA+dASvrCiwfX9OwBAoFaWys8lk+vY2ncw6tBQg3RrbCOD60BHoEAHDGQnHSZViTlC0a0UKljLtHXF/FqphEo5AEUtPY8WzYs0zkk4OJUY7KvcHrodJJ0Qsp2Q8ZEdKqCvmLbT/qFb2GFI8JeMJFjIJl+SlkgwA03p8tv1gRUUSMkEpl+KLv1In32omAdBssffIEfsrTymhXJ9TNfrFbMiUjbwqxAUgqPkGD64K8ABQUcVsfjmfY8tLUuOJOAD2cj7giP0BHV3Ohwsa3STbcNNgBACuoJG2kP9HFjTikk63QlG7QXMOoZJOgq7GknT6RK3HabDCC2EA/q+o1S/r9UDI1smWyuVOA3gPWa9f2OzdCLHM5rULnpH8kVBi+It3ETYHSLu9n1yVy12yq/1OaXeAuP0YA8Nnv/9WcXuQvB8HEVPeL8WT99sDDkoq/Ik24JBS4g44BI94xE1GblyKP+JxasglCQAp0ZDL6TGfuAASj/l8/KDTx496fYJht48f9/sEA4/JRz7/eq/lEw69vvfs7weP/X6CwefPMPr9GYbf33/8/z+WaX62sYUE9QAAAABJRU5ErkJggg==')
        icon = QImage.fromData(icon_bytes)
        icon_pixmap = QPixmap.fromImage(icon)
        self.setWindowIcon(QIcon(icon_pixmap))

        QApplication.setStyle(QStyleFactory.create('Fusion'))
        self.setWindowTitle("GUI")
        self.show()
    
    def closeEvent(self, event):
        QApplication.quit()


app = QApplication.instance()
if app is None: app = QApplication([])
window = MainWindow()
app.exec_()

In [None]:
from bs4 import BeautifulSoup

url = ''
req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
html = urllib.request.urlopen(req).read()
soup = BeautifulSoup(html, 'html.parser')