## Take the screenshot of the specified URLs.
+ Saves a full screen screenshot as a png image file.
+ The browser is not visually displayed because chromedriver is running in headless mode to capture the full screen.
+ Since chrome in eadless mode does not support BASIC authentication, this tool does not support BASIC authentication either.
    + "The team owning Headleass Chrome has decided not to support extensions."
    +  https://bugs.chromium.org/p/chromedriver/issues/detail?id=2342
    +  https://bugs.chromium.org/p/chromium/issues/detail?id=706008

### Usage
+ Enter the target URL (and ID) in a CSV file in the following format. The file will be read and the screenshot will be acquired.
    + Format
        + ID,URL
            + Only ASCII characters can be specified for ID (no multibyte characters)
            + [Reason] Because OpenCV does not accept double-byte characters in the filename when using it in the next step (#Image Difference with OpenCV").
    + write 1 URL per line.
    + Save the file in UTF-8 encoding.
    + URL lists are assumed to be placed directly under the ". /url-lists" folder.
    + Select the URL lists (CSV file) name under the "./url-lists" to load as interactive.
+ Save the screenshot as "ID.png".
    + saved png image location.
        + The 'screenshot-YYYYYYMMDD-hhmmss' directory (where YYYYYMMDD-HHMMS is the start date and time) just under the directory where this program is run.
+ By default, processes are created for the number of CPU cores in the execution environment and processed in parallel. To adjust, change the "num_processors" variable.
+ The results of the execution (the pair of the URL and the png image file name) are output as a CSV file named "results-YYYYYMMMDD-HHMMMSS.csv" (the YYYYYMMDD-HHMMMSS part corresponds to the directory name where the image is saved).

In [1]:
import time
import datetime
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver import ActionChains
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import NoSuchElementException
import zipfile
import os
import csv
from pathlib import Path

import multiprocessing as mp

from ipywidgets import interact, widgets
import re

import requests

# main

### Interactively select CSV file name to load.

In [None]:
DIR_URL_LISTS = 'url-lists/'

files = [f for f in os.listdir(DIR_URL_LISTS) if re.search('.*(csv)', f, re.IGNORECASE)]

URL_LISTS_FILE = widgets.Text()

def url_csv_filename(file):
    URL_LISTS_FILE.value = file

interact(url_csv_filename, file=files)

In [None]:
print('> The selected file is "{URL_LISTS_FILE.value}"')

### calls worker

In [2]:
skipUntil = 0 # 1 -> skip 1st line. 200 -> skip first 200 lines.
stopAt = -1 # 2 -> stop after reading 2 lines. 200 -> stop after reading 200 lines. -1 -> read all lines.

In [None]:
#####
# * note
# When you re-run the program after editting module files,
# you'd better to restart the jupyter kernel, especially after editting settings.py.
#####

from mymodules import settings
from mymodules import workers

# Multiprocessing in Python on Windows and Jupyter/Ipython â€” Making it work
# https://medium.com/@grvsinghal/speed-up-your-python-code-using-multiprocessing-on-windows-and-jupyter-or-ipython-2714b49d6fac



if __name__ == '__main__':
    
    try:
        num_processors = mp.cpu_count()
        # num_processors = 1
        
        index = 0
        arr_url_lists = []
        
        obj_path_to_input_file = Path(DIR_URL_LISTS, Path(URL_LISTS_FILE.value))
        with open(obj_path_to_input_file, newline='',encoding="utf-8_sig") as f:
            reader = csv.reader(f)
            for row in reader:
                
                index += 1
                
                file_id = row[0]
                url = ",".join(row[1:])
                
                if index <= skipUntil:
                    continue
                if stopAt > 0 and stopAt == index:
                    arr_url_lists.append([file_id, url])
                    print(f'url in writer() : {file_id} - {url}')
                    break
                print(f'url in writer() : {file_id} - {url}')
                arr_url_lists.append([file_id, url])              
        
        time_start = time.time()
        
        str_start_datetime = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
        print(f'Start at {str_start_datetime}')
        
        p = mp.Pool(processes = num_processors, initializer = workers.ts_init, initargs= [str_start_datetime])
        arr_result = p.starmap(workers.take_screenshot, arr_url_lists)
        
        time_end = time.time()
        time_diff = time_end - time_start
        str_end_datetime = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')

        print(f'time : {time_diff}')
        
        with open('results-' + str_start_datetime + '.csv', 'a',encoding="utf-8_sig") as result_file:
            for ar in arr_result:
                # ar's shape is [url, image_filename]
                result_file.write(f'"{ar[0]}","{ar[1]}"\n')
        print("done")
        
    except Exception as ex:
        print(f'{type(ex).__name__}: {ex}')
        raise
    finally:
        pass