This repository has been archived by the owner on Jan 13, 2024. It is now read-only.
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add function to run selenium, glue images
- Loading branch information
Showing
11 changed files
with
366 additions
and
16 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
""" | ||
@brief test log(time=4s) | ||
""" | ||
|
||
import sys | ||
import os | ||
import unittest | ||
|
||
|
||
try: | ||
import src | ||
except ImportError: | ||
path = os.path.normpath( | ||
os.path.abspath( | ||
os.path.join( | ||
os.path.split(__file__)[0], | ||
"..", | ||
".."))) | ||
if path not in sys.path: | ||
sys.path.append(path) | ||
import src | ||
|
||
try: | ||
import pyquickhelper as skip_ | ||
except ImportError: | ||
path = os.path.normpath( | ||
os.path.abspath( | ||
os.path.join( | ||
os.path.split(__file__)[0], | ||
"..", | ||
"..", | ||
"..", | ||
"pyquickhelper", | ||
"src"))) | ||
if path not in sys.path: | ||
sys.path.append(path) | ||
import pyquickhelper as skip_ | ||
|
||
from pyquickhelper.loghelper import fLOG | ||
from pyquickhelper.pycode import get_temp_folder | ||
from src.ensae_teaching_cs.faq.faq_web import webshot, webhtml | ||
|
||
|
||
class TestFaqWeb(unittest.TestCase): | ||
|
||
def _test_selenium_html(self): | ||
fLOG( | ||
__file__, | ||
self._testMethodName, | ||
OutputPrint=__name__ == "__main__") | ||
|
||
url = "http://www.xavierdupre.fr" | ||
html = webhtml(url) | ||
assert len(html) > 0 | ||
self.assertEqual(len(html[0]), 2) | ||
if "href" not in html[0][1]: | ||
raise Exception(html) | ||
|
||
html = webhtml(url, module='splinter') | ||
assert len(html) > 0 | ||
self.assertEqual(len(html[0]), 2) | ||
if "href" not in html[0][1]: | ||
raise Exception(html) | ||
|
||
def test_selenium_image(self): | ||
fLOG( | ||
__file__, | ||
self._testMethodName, | ||
OutputPrint=__name__ == "__main__") | ||
|
||
temp = get_temp_folder(__file__, "temp_selenium_image") | ||
img = os.path.join(temp, "image_selenium.png") | ||
url = "http://www.xavierdupre.fr" | ||
res = webshot(img, url) | ||
assert os.path.exists(img) | ||
fLOG(res) | ||
self.assertEqual(len(res), 1) | ||
self.assertEqual(len(res[0]), 2) | ||
|
||
img = os.path.join(temp, "image_splinter.png") | ||
res = webshot(img, url, module='splinter') | ||
img = res[0][1] | ||
assert os.path.exists(img) | ||
fLOG(res) | ||
self.assertEqual(len(res), 1) | ||
self.assertEqual(len(res[0]), 2) | ||
|
||
if __name__ == "__main__": | ||
unittest.main() |
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
""" | ||
@brief test log(time=10s) | ||
""" | ||
import os | ||
import sys | ||
import unittest | ||
|
||
|
||
try: | ||
import src | ||
import pyquickhelper as skip_ | ||
except ImportError: | ||
path = os.path.normpath( | ||
os.path.abspath( | ||
os.path.join( | ||
os.path.split(__file__)[0], | ||
"..", | ||
".."))) | ||
if path not in sys.path: | ||
sys.path.append(path) | ||
path = os.path.normpath( | ||
os.path.abspath( | ||
os.path.join( | ||
os.path.split(__file__)[0], | ||
"..", | ||
"..", | ||
"..", | ||
"pyquickhelper", | ||
"src"))) | ||
if path not in sys.path: | ||
sys.path.append(path) | ||
import src | ||
import pyquickhelper as skip_ | ||
|
||
|
||
from pyquickhelper.loghelper import fLOG | ||
from pyquickhelper.pycode import get_temp_folder | ||
from src.ensae_teaching_cs.helpers.image_helper import collate_images | ||
|
||
|
||
class TestImageHelper(unittest.TestCase): | ||
|
||
def test_collate_imgae(self): | ||
fLOG( | ||
__file__, | ||
self._testMethodName, | ||
OutputPrint=__name__ == "__main__") | ||
|
||
temp = get_temp_folder(__file__, "temp_image_helper") | ||
img = os.path.join(temp, "..", "data") | ||
imgs = os.listdir(img) | ||
png = [os.path.join(img, _) | ||
for _ in imgs if os.path.splitext(_)[-1] == ".png" and "00" not in _] | ||
assert len(png) > 0 | ||
out = os.path.join(temp, "out_collate.png") | ||
im = collate_images(png, out) | ||
assert os.path.exists(out) | ||
assert im is not None | ||
|
||
|
||
if __name__ == "__main__": | ||
unittest.main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,161 @@ | ||
# -*- coding: utf-8 -*- | ||
""" | ||
@file | ||
@brief A few functions about scrapping | ||
""" | ||
import os | ||
import datetime | ||
|
||
|
||
def webshot(img, url, navigator="firefox", add_date=False, | ||
module="selenium", size=None): | ||
""" | ||
Uses the modules `selenium <http://selenium-python.readthedocs.io/>`_ to take a picture of a website | ||
(or the module `splinter <http://splinter.readthedocs.io/en/latest/>`_ - does not work with IE). | ||
The function was only tested with Firefox. | ||
If url and img are lists, the function goes through all the urls and save webshots. | ||
@param img list of image names | ||
@param url url | ||
@param navigator firefox, chrome, (ie: does not work well) | ||
@param add_date add a date to the image filename | ||
@param module module to use (selenium or splinter or None if you need to keep the first one available) | ||
@param size to resize the webshot (if not None) | ||
@return list of [ ( url, image name) ] | ||
Check the list of available webdriver at | ||
`selenium/webdriver <https://github.com/SeleniumHQ/selenium/tree/master/py/selenium/webdriver>`_ | ||
and add one to the code if needed. | ||
""" | ||
if navigator is None: | ||
try: | ||
import selenium as skip_ | ||
module = "selenium" | ||
except ImportError: | ||
module = "splinter" | ||
|
||
res = [] | ||
if module == "selenium": | ||
from selenium import webdriver | ||
|
||
if navigator == "firefox": | ||
browser = webdriver.Firefox() | ||
elif navigator == "chrome": | ||
browser = webdriver.Chrome() | ||
elif navigator == "ie": | ||
browser = webdriver.Ie() | ||
elif navigator == "edge": | ||
browser = webdriver.Edge() | ||
else: | ||
raise Exception("unable to interpret the navigator") | ||
|
||
if size is not None: | ||
browser.set_window_size(size[0], size[1]) | ||
|
||
if not isinstance(url, list): | ||
url = [url] | ||
if not isinstance(img, list): | ||
img = [img] | ||
if len(url) != len(img): | ||
raise Exception("different number of urls and images") | ||
for u, i in zip(url, img): | ||
browser.get(u) | ||
if add_date: | ||
dt = datetime.datetime.now() | ||
a, b = os.path.splitext(i) | ||
i = "{0}.{1}{2}".format(a, str(dt).replace( | ||
":", "-").replace("/", "-"), b) | ||
browser.get_screenshot_as_file(i) | ||
res.append((u, i)) | ||
browser.quit() | ||
|
||
elif module == "splinter": | ||
|
||
from splinter import Browser | ||
|
||
with Browser(navigator) as browser: | ||
if size is not None: | ||
browser.driver.set_window_size(size[0], size[1]) | ||
|
||
if not isinstance(url, list): | ||
url = [url] | ||
if not isinstance(img, list): | ||
img = [img] | ||
if len(url) != len(img): | ||
raise Exception("different number of urls and images") | ||
for u, i in zip(url, img): | ||
browser.visit(u) | ||
if add_date: | ||
dt = datetime.datetime.now() | ||
a, b = os.path.splitext(i) | ||
i = "{0}.{1}{2}".format(a, str(dt).replace( | ||
":", "-").replace("/", "-"), b) | ||
g = browser.screenshot(os.path.abspath(i)) | ||
res.append((u, g)) | ||
else: | ||
raise ImportError("unknown module required '{0}'".format(module)) | ||
|
||
return res | ||
|
||
|
||
def webhtml(url, navigator="firefox", module="selenium"): | ||
""" | ||
Uses the modules `selenium <http://selenium-python.readthedocs.io/>`_ to retrieve the html of a website | ||
(or the module `splinter <http://splinter.readthedocs.io/en/latest/>`_ - does not work with IE). | ||
The function was only tested with Firefox. | ||
@param url url | ||
@param navigator firefox, chrome, (ie: does not work well) | ||
@param module module to use (selenium or splinter or None if you need to keep the first one available) | ||
@return list of [ ( url, html) ] | ||
Check the list of available webdriver at | ||
`selenium/webdriver <https://github.com/SeleniumHQ/selenium/tree/master/py/selenium/webdriver>`_ | ||
and add one to the code if needed. | ||
""" | ||
if navigator is None: | ||
try: | ||
import selenium as skip_ | ||
module = "selenium" | ||
except ImportError: | ||
module = "splinter" | ||
|
||
res = [] | ||
if module == "selenium": | ||
from selenium import webdriver | ||
|
||
if navigator == "firefox": | ||
browser = webdriver.Firefox() | ||
elif navigator == "chrome": | ||
browser = webdriver.Chrome() | ||
elif navigator == "ie": | ||
browser = webdriver.Ie() | ||
elif navigator == "edge": | ||
browser = webdriver.Edge() | ||
else: | ||
raise Exception("unable to interpret the navigator") | ||
|
||
if not isinstance(url, list): | ||
url = [url] | ||
for u in url: | ||
browser.get(u) | ||
i = browser.page_source | ||
res.append((u, i)) | ||
browser.quit() | ||
|
||
elif module == "splinter": | ||
|
||
from splinter import Browser | ||
|
||
with Browser(navigator) as browser: | ||
if not isinstance(url, list): | ||
url = [url] | ||
for u in url: | ||
browser.visit(u) | ||
i = browser.html | ||
res.append((u, i)) | ||
else: | ||
raise ImportError("unknown module required '{0}'".format(module)) | ||
|
||
return res |
Oops, something went wrong.