Skip to content
This repository has been archived by the owner on Jan 13, 2024. It is now read-only.

Commit

Permalink
add function to run selenium, glue images
Browse files Browse the repository at this point in the history
  • Loading branch information
sdpython committed May 4, 2016
1 parent 33caed7 commit da69718
Show file tree
Hide file tree
Showing 11 changed files with 366 additions and 16 deletions.
89 changes: 89 additions & 0 deletions _unittests/ut_faq/test_faq_web.py
@@ -0,0 +1,89 @@
"""
@brief test log(time=4s)
"""

import sys
import os
import unittest


try:
import src
except ImportError:
path = os.path.normpath(
os.path.abspath(
os.path.join(
os.path.split(__file__)[0],
"..",
"..")))
if path not in sys.path:
sys.path.append(path)
import src

try:
import pyquickhelper as skip_
except ImportError:
path = os.path.normpath(
os.path.abspath(
os.path.join(
os.path.split(__file__)[0],
"..",
"..",
"..",
"pyquickhelper",
"src")))
if path not in sys.path:
sys.path.append(path)
import pyquickhelper as skip_

from pyquickhelper.loghelper import fLOG
from pyquickhelper.pycode import get_temp_folder
from src.ensae_teaching_cs.faq.faq_web import webshot, webhtml


class TestFaqWeb(unittest.TestCase):

def _test_selenium_html(self):
fLOG(
__file__,
self._testMethodName,
OutputPrint=__name__ == "__main__")

url = "http://www.xavierdupre.fr"
html = webhtml(url)
assert len(html) > 0
self.assertEqual(len(html[0]), 2)
if "href" not in html[0][1]:
raise Exception(html)

html = webhtml(url, module='splinter')
assert len(html) > 0
self.assertEqual(len(html[0]), 2)
if "href" not in html[0][1]:
raise Exception(html)

def test_selenium_image(self):
fLOG(
__file__,
self._testMethodName,
OutputPrint=__name__ == "__main__")

temp = get_temp_folder(__file__, "temp_selenium_image")
img = os.path.join(temp, "image_selenium.png")
url = "http://www.xavierdupre.fr"
res = webshot(img, url)
assert os.path.exists(img)
fLOG(res)
self.assertEqual(len(res), 1)
self.assertEqual(len(res[0]), 2)

img = os.path.join(temp, "image_splinter.png")
res = webshot(img, url, module='splinter')
img = res[0][1]
assert os.path.exists(img)
fLOG(res)
self.assertEqual(len(res), 1)
self.assertEqual(len(res[0]), 2)

if __name__ == "__main__":
unittest.main()
Binary file added _unittests/ut_helpers/data/image_selenium.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added _unittests/ut_helpers/data/image_splinter.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
63 changes: 63 additions & 0 deletions _unittests/ut_helpers/test_image_helper.py
@@ -0,0 +1,63 @@
"""
@brief test log(time=10s)
"""
import os
import sys
import unittest


try:
import src
import pyquickhelper as skip_
except ImportError:
path = os.path.normpath(
os.path.abspath(
os.path.join(
os.path.split(__file__)[0],
"..",
"..")))
if path not in sys.path:
sys.path.append(path)
path = os.path.normpath(
os.path.abspath(
os.path.join(
os.path.split(__file__)[0],
"..",
"..",
"..",
"pyquickhelper",
"src")))
if path not in sys.path:
sys.path.append(path)
import src
import pyquickhelper as skip_


from pyquickhelper.loghelper import fLOG
from pyquickhelper.pycode import get_temp_folder
from src.ensae_teaching_cs.helpers.image_helper import collate_images


class TestImageHelper(unittest.TestCase):

def test_collate_imgae(self):
fLOG(
__file__,
self._testMethodName,
OutputPrint=__name__ == "__main__")

temp = get_temp_folder(__file__, "temp_image_helper")
img = os.path.join(temp, "..", "data")
imgs = os.listdir(img)
png = [os.path.join(img, _)
for _ in imgs if os.path.splitext(_)[-1] == ".png" and "00" not in _]
assert len(png) > 0
out = os.path.join(temp, "out_collate.png")
im = collate_images(png, out)
assert os.path.exists(out)
assert im is not None


if __name__ == "__main__":
unittest.main()
Expand Up @@ -39,7 +39,7 @@
from src.ensae_teaching_cs.helpers.video_helper import make_video


class TestVideo(unittest.TestCase):
class TestVideoHelper(unittest.TestCase):

def test_make_video(self):
fLOG(
Expand All @@ -51,7 +51,7 @@ def test_make_video(self):
img = os.path.join(temp, "..", "data")
imgs = os.listdir(img)
png = [os.path.join(img, _)
for _ in imgs if os.path.splitext(_)[-1] == ".png"]
for _ in imgs if os.path.splitext(_)[-1] == ".png" and "00" in _]
assert len(png) > 0
out = os.path.join(temp, "out_video.avi")
v = make_video(png, out, size=(1000, 300))
Expand Down
17 changes: 9 additions & 8 deletions _unittests/ut_module/test_flake8.py
Expand Up @@ -47,7 +47,8 @@ def test_flake8_src(self):
src_ = os.path.normpath(os.path.join(thi, "..", "..", "src"))
check_pep8(src_, fLOG=fLOG, extended=[("fLOG", _extended_refactoring)],
ignore=('E501', 'E265', 'E731'),
neg_filter="((.*pandas_helper.*)|(.*faq_python.*)|(.*send_feedback.*))")
skip=["skip_' imported but unused"],
neg_filter="((.*pandas_helper.*)|(.*faq_python.*)|(.*send_feedback.*)|(.*python_exemple_py_to_html.*))")

def test_flake8_test(self):
fLOG(
Expand All @@ -64,13 +65,13 @@ def test_flake8_test(self):
thi = os.path.abspath(os.path.dirname(__file__))
test = os.path.normpath(os.path.join(thi, "..", ))
check_pep8(test, fLOG=fLOG, neg_filter="temp_.*",
skip=["'src' imported but unused",
"'skip_' imported but unused",
"'skip__' imported but unused",
"'skip___' imported but unused",
"'skip____' imported but unused",
"'skip_____' imported but unused",
"'skip______' imported but unused",
skip=["src' imported but unused",
"skip_' imported but unused",
"skip__' imported but unused",
"skip___' imported but unused",
"skip____' imported but unused",
"skip_____' imported but unused",
"skip______' imported but unused",
],
extended=[("fLOG", _extended_refactoring)],
max_line_length=320)
Expand Down
4 changes: 0 additions & 4 deletions appveyor.yml
Expand Up @@ -8,10 +8,6 @@ environment:

matrix:

# Pre-installed Python versions, which Appveyor may upgrade to
# a later point release.
# See: http://www.appveyor.com/docs/installed-software#python

- PYTHON: "C:\\Python35-x64"
PYTHON_VERSION: "3.5.x"
PYTHON_ARCH: "64"
Expand Down
161 changes: 161 additions & 0 deletions src/ensae_teaching_cs/faq/faq_web.py
@@ -0,0 +1,161 @@
# -*- coding: utf-8 -*-
"""
@file
@brief A few functions about scrapping
"""
import os
import datetime


def webshot(img, url, navigator="firefox", add_date=False,
module="selenium", size=None):
"""
Uses the modules `selenium <http://selenium-python.readthedocs.io/>`_ to take a picture of a website
(or the module `splinter <http://splinter.readthedocs.io/en/latest/>`_ - does not work with IE).
The function was only tested with Firefox.
If url and img are lists, the function goes through all the urls and save webshots.
@param img list of image names
@param url url
@param navigator firefox, chrome, (ie: does not work well)
@param add_date add a date to the image filename
@param module module to use (selenium or splinter or None if you need to keep the first one available)
@param size to resize the webshot (if not None)
@return list of [ ( url, image name) ]
Check the list of available webdriver at
`selenium/webdriver <https://github.com/SeleniumHQ/selenium/tree/master/py/selenium/webdriver>`_
and add one to the code if needed.
"""
if navigator is None:
try:
import selenium as skip_
module = "selenium"
except ImportError:
module = "splinter"

res = []
if module == "selenium":
from selenium import webdriver

if navigator == "firefox":
browser = webdriver.Firefox()
elif navigator == "chrome":
browser = webdriver.Chrome()
elif navigator == "ie":
browser = webdriver.Ie()
elif navigator == "edge":
browser = webdriver.Edge()
else:
raise Exception("unable to interpret the navigator")

if size is not None:
browser.set_window_size(size[0], size[1])

if not isinstance(url, list):
url = [url]
if not isinstance(img, list):
img = [img]
if len(url) != len(img):
raise Exception("different number of urls and images")
for u, i in zip(url, img):
browser.get(u)
if add_date:
dt = datetime.datetime.now()
a, b = os.path.splitext(i)
i = "{0}.{1}{2}".format(a, str(dt).replace(
":", "-").replace("/", "-"), b)
browser.get_screenshot_as_file(i)
res.append((u, i))
browser.quit()

elif module == "splinter":

from splinter import Browser

with Browser(navigator) as browser:
if size is not None:
browser.driver.set_window_size(size[0], size[1])

if not isinstance(url, list):
url = [url]
if not isinstance(img, list):
img = [img]
if len(url) != len(img):
raise Exception("different number of urls and images")
for u, i in zip(url, img):
browser.visit(u)
if add_date:
dt = datetime.datetime.now()
a, b = os.path.splitext(i)
i = "{0}.{1}{2}".format(a, str(dt).replace(
":", "-").replace("/", "-"), b)
g = browser.screenshot(os.path.abspath(i))
res.append((u, g))
else:
raise ImportError("unknown module required '{0}'".format(module))

return res


def webhtml(url, navigator="firefox", module="selenium"):
"""
Uses the modules `selenium <http://selenium-python.readthedocs.io/>`_ to retrieve the html of a website
(or the module `splinter <http://splinter.readthedocs.io/en/latest/>`_ - does not work with IE).
The function was only tested with Firefox.
@param url url
@param navigator firefox, chrome, (ie: does not work well)
@param module module to use (selenium or splinter or None if you need to keep the first one available)
@return list of [ ( url, html) ]
Check the list of available webdriver at
`selenium/webdriver <https://github.com/SeleniumHQ/selenium/tree/master/py/selenium/webdriver>`_
and add one to the code if needed.
"""
if navigator is None:
try:
import selenium as skip_
module = "selenium"
except ImportError:
module = "splinter"

res = []
if module == "selenium":
from selenium import webdriver

if navigator == "firefox":
browser = webdriver.Firefox()
elif navigator == "chrome":
browser = webdriver.Chrome()
elif navigator == "ie":
browser = webdriver.Ie()
elif navigator == "edge":
browser = webdriver.Edge()
else:
raise Exception("unable to interpret the navigator")

if not isinstance(url, list):
url = [url]
for u in url:
browser.get(u)
i = browser.page_source
res.append((u, i))
browser.quit()

elif module == "splinter":

from splinter import Browser

with Browser(navigator) as browser:
if not isinstance(url, list):
url = [url]
for u in url:
browser.visit(u)
i = browser.html
res.append((u, i))
else:
raise ImportError("unknown module required '{0}'".format(module))

return res

0 comments on commit da69718

Please sign in to comment.