add function to run selenium, glue images

sdpython · May 4, 2016 · da69718 · da69718
1 parent 33caed7
commit da69718
Show file tree

Hide file tree

Showing 11 changed files with 366 additions and 16 deletions.
diff --git a/_unittests/ut_faq/test_faq_web.py b/_unittests/ut_faq/test_faq_web.py
@@ -0,0 +1,89 @@
+"""
+@brief      test log(time=4s)
+"""
+
+import sys
+import os
+import unittest
+
+
+try:
+    import src
+except ImportError:
+    path = os.path.normpath(
+        os.path.abspath(
+            os.path.join(
+                os.path.split(__file__)[0],
+                "..",
+                "..")))
+    if path not in sys.path:
+        sys.path.append(path)
+    import src
+
+try:
+    import pyquickhelper as skip_
+except ImportError:
+    path = os.path.normpath(
+        os.path.abspath(
+            os.path.join(
+                os.path.split(__file__)[0],
+                "..",
+                "..",
+                "..",
+                "pyquickhelper",
+                "src")))
+    if path not in sys.path:
+        sys.path.append(path)
+    import pyquickhelper as skip_
+
+from pyquickhelper.loghelper import fLOG
+from pyquickhelper.pycode import get_temp_folder
+from src.ensae_teaching_cs.faq.faq_web import webshot, webhtml
+
+
+class TestFaqWeb(unittest.TestCase):
+
+    def _test_selenium_html(self):
+        fLOG(
+            __file__,
+            self._testMethodName,
+            OutputPrint=__name__ == "__main__")
+
+        url = "http://www.xavierdupre.fr"
+        html = webhtml(url)
+        assert len(html) > 0
+        self.assertEqual(len(html[0]), 2)
+        if "href" not in html[0][1]:
+            raise Exception(html)
+
+        html = webhtml(url, module='splinter')
+        assert len(html) > 0
+        self.assertEqual(len(html[0]), 2)
+        if "href" not in html[0][1]:
+            raise Exception(html)
+
+    def test_selenium_image(self):
+        fLOG(
+            __file__,
+            self._testMethodName,
+            OutputPrint=__name__ == "__main__")
+
+        temp = get_temp_folder(__file__, "temp_selenium_image")
+        img = os.path.join(temp, "image_selenium.png")
+        url = "http://www.xavierdupre.fr"
+        res = webshot(img, url)
+        assert os.path.exists(img)
+        fLOG(res)
+        self.assertEqual(len(res), 1)
+        self.assertEqual(len(res[0]), 2)
+
+        img = os.path.join(temp, "image_splinter.png")
+        res = webshot(img, url, module='splinter')
+        img = res[0][1]
+        assert os.path.exists(img)
+        fLOG(res)
+        self.assertEqual(len(res), 1)
+        self.assertEqual(len(res[0]), 2)
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/_unittests/ut_helpers/data/image_selenium.png b/_unittests/ut_helpers/data/image_selenium.png
diff --git a/_unittests/ut_helpers/data/image_splinter.png b/_unittests/ut_helpers/data/image_splinter.png
diff --git a/_unittests/ut_helpers/test_image_helper.py b/_unittests/ut_helpers/test_image_helper.py
@@ -0,0 +1,63 @@
+"""
+@brief      test log(time=10s)
+
+"""
+import os
+import sys
+import unittest
+
+
+try:
+    import src
+    import pyquickhelper as skip_
+except ImportError:
+    path = os.path.normpath(
+        os.path.abspath(
+            os.path.join(
+                os.path.split(__file__)[0],
+                "..",
+                "..")))
+    if path not in sys.path:
+        sys.path.append(path)
+    path = os.path.normpath(
+        os.path.abspath(
+            os.path.join(
+                os.path.split(__file__)[0],
+                "..",
+                "..",
+                "..",
+                "pyquickhelper",
+                "src")))
+    if path not in sys.path:
+        sys.path.append(path)
+    import src
+    import pyquickhelper as skip_
+
+
+from pyquickhelper.loghelper import fLOG
+from pyquickhelper.pycode import get_temp_folder
+from src.ensae_teaching_cs.helpers.image_helper import collate_images
+
+
+class TestImageHelper(unittest.TestCase):
+
+    def test_collate_imgae(self):
+        fLOG(
+            __file__,
+            self._testMethodName,
+            OutputPrint=__name__ == "__main__")
+
+        temp = get_temp_folder(__file__, "temp_image_helper")
+        img = os.path.join(temp, "..", "data")
+        imgs = os.listdir(img)
+        png = [os.path.join(img, _)
+               for _ in imgs if os.path.splitext(_)[-1] == ".png" and "00" not in _]
+        assert len(png) > 0
+        out = os.path.join(temp, "out_collate.png")
+        im = collate_images(png, out)
+        assert os.path.exists(out)
+        assert im is not None
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/_unittests/ut_helpers/test_video.py → _unittests/ut_helpers/test_video_helper.py b/_unittests/ut_helpers/test_video.py → _unittests/ut_helpers/test_video_helper.py
@@ -39,7 +39,7 @@
 from src.ensae_teaching_cs.helpers.video_helper import make_video
 
 
-class TestVideo(unittest.TestCase):
+class TestVideoHelper(unittest.TestCase):
 
     def test_make_video(self):
         fLOG(
@@ -51,7 +51,7 @@ def test_make_video(self):
         img = os.path.join(temp, "..", "data")
         imgs = os.listdir(img)
         png = [os.path.join(img, _)
-               for _ in imgs if os.path.splitext(_)[-1] == ".png"]
+               for _ in imgs if os.path.splitext(_)[-1] == ".png" and "00" in _]
         assert len(png) > 0
         out = os.path.join(temp, "out_video.avi")
         v = make_video(png, out, size=(1000, 300))

diff --git a/_unittests/ut_module/test_flake8.py b/_unittests/ut_module/test_flake8.py
@@ -47,7 +47,8 @@ def test_flake8_src(self):
         src_ = os.path.normpath(os.path.join(thi, "..", "..", "src"))
         check_pep8(src_, fLOG=fLOG, extended=[("fLOG", _extended_refactoring)],
                    ignore=('E501', 'E265', 'E731'),
-                   neg_filter="((.*pandas_helper.*)|(.*faq_python.*)|(.*send_feedback.*))")
+                   skip=["skip_' imported but unused"],
+                   neg_filter="((.*pandas_helper.*)|(.*faq_python.*)|(.*send_feedback.*)|(.*python_exemple_py_to_html.*))")
 
     def test_flake8_test(self):
         fLOG(
@@ -64,13 +65,13 @@ def test_flake8_test(self):
         thi = os.path.abspath(os.path.dirname(__file__))
         test = os.path.normpath(os.path.join(thi, "..", ))
         check_pep8(test, fLOG=fLOG, neg_filter="temp_.*",
-                   skip=["'src' imported but unused",
-                         "'skip_' imported but unused",
-                         "'skip__' imported but unused",
-                         "'skip___' imported but unused",
-                         "'skip____' imported but unused",
-                         "'skip_____' imported but unused",
-                         "'skip______' imported but unused",
+                   skip=["src' imported but unused",
+                         "skip_' imported but unused",
+                         "skip__' imported but unused",
+                         "skip___' imported but unused",
+                         "skip____' imported but unused",
+                         "skip_____' imported but unused",
+                         "skip______' imported but unused",
                          ],
                    extended=[("fLOG", _extended_refactoring)],
                    max_line_length=320)

diff --git a/appveyor.yml b/appveyor.yml
@@ -8,10 +8,6 @@ environment:
 
   matrix:
 
-    # Pre-installed Python versions, which Appveyor may upgrade to
-    # a later point release.
-    # See: http://www.appveyor.com/docs/installed-software#python
-
     - PYTHON: "C:\\Python35-x64"
       PYTHON_VERSION: "3.5.x"
       PYTHON_ARCH: "64"

diff --git a/src/ensae_teaching_cs/faq/faq_web.py b/src/ensae_teaching_cs/faq/faq_web.py
@@ -0,0 +1,161 @@
+# -*- coding: utf-8 -*-
+"""
+@file
+@brief A few functions about scrapping
+
+"""
+import os
+import datetime
+
+
+def webshot(img, url, navigator="firefox", add_date=False,
+            module="selenium", size=None):
+    """
+    Uses the modules `selenium <http://selenium-python.readthedocs.io/>`_ to take a picture of a website
+    (or the module `splinter <http://splinter.readthedocs.io/en/latest/>`_ - does not work with IE).
+    The function was only tested with Firefox.
+    If url and img are lists, the function goes through all the urls and save webshots.
+
+    @param      img             list of image names
+    @param      url             url
+    @param      navigator       firefox, chrome, (ie: does not work well)
+    @param      add_date        add a date to the image filename
+    @param      module          module to use (selenium or splinter or None if you need to keep the first one available)
+    @param      size            to resize the webshot (if not None)
+    @return                     list of [ ( url, image name) ]
+
+    Check the list of available webdriver at
+    `selenium/webdriver <https://github.com/SeleniumHQ/selenium/tree/master/py/selenium/webdriver>`_
+    and add one to the code if needed.
+    """
+    if navigator is None:
+        try:
+            import selenium as skip_
+            module = "selenium"
+        except ImportError:
+            module = "splinter"
+
+    res = []
+    if module == "selenium":
+        from selenium import webdriver
+
+        if navigator == "firefox":
+            browser = webdriver.Firefox()
+        elif navigator == "chrome":
+            browser = webdriver.Chrome()
+        elif navigator == "ie":
+            browser = webdriver.Ie()
+        elif navigator == "edge":
+            browser = webdriver.Edge()
+        else:
+            raise Exception("unable to interpret the navigator")
+
+        if size is not None:
+            browser.set_window_size(size[0], size[1])
+
+        if not isinstance(url, list):
+            url = [url]
+        if not isinstance(img, list):
+            img = [img]
+        if len(url) != len(img):
+            raise Exception("different number of urls and images")
+        for u, i in zip(url, img):
+            browser.get(u)
+            if add_date:
+                dt = datetime.datetime.now()
+                a, b = os.path.splitext(i)
+                i = "{0}.{1}{2}".format(a, str(dt).replace(
+                    ":", "-").replace("/", "-"), b)
+            browser.get_screenshot_as_file(i)
+            res.append((u, i))
+        browser.quit()
+
+    elif module == "splinter":
+
+        from splinter import Browser
+
+        with Browser(navigator) as browser:
+            if size is not None:
+                browser.driver.set_window_size(size[0], size[1])
+
+            if not isinstance(url, list):
+                url = [url]
+            if not isinstance(img, list):
+                img = [img]
+            if len(url) != len(img):
+                raise Exception("different number of urls and images")
+            for u, i in zip(url, img):
+                browser.visit(u)
+                if add_date:
+                    dt = datetime.datetime.now()
+                    a, b = os.path.splitext(i)
+                    i = "{0}.{1}{2}".format(a, str(dt).replace(
+                        ":", "-").replace("/", "-"), b)
+                g = browser.screenshot(os.path.abspath(i))
+                res.append((u, g))
+    else:
+        raise ImportError("unknown module required '{0}'".format(module))
+
+    return res
+
+
+def webhtml(url, navigator="firefox", module="selenium"):
+    """
+    Uses the modules `selenium <http://selenium-python.readthedocs.io/>`_ to retrieve the html of a website
+    (or the module `splinter <http://splinter.readthedocs.io/en/latest/>`_ - does not work with IE).
+    The function was only tested with Firefox.
+
+    @param      url             url
+    @param      navigator       firefox, chrome, (ie: does not work well)
+    @param      module          module to use (selenium or splinter or None if you need to keep the first one available)
+    @return                     list of [ ( url, html) ]
+
+    Check the list of available webdriver at
+    `selenium/webdriver <https://github.com/SeleniumHQ/selenium/tree/master/py/selenium/webdriver>`_
+    and add one to the code if needed.
+    """
+    if navigator is None:
+        try:
+            import selenium as skip_
+            module = "selenium"
+        except ImportError:
+            module = "splinter"
+
+    res = []
+    if module == "selenium":
+        from selenium import webdriver
+
+        if navigator == "firefox":
+            browser = webdriver.Firefox()
+        elif navigator == "chrome":
+            browser = webdriver.Chrome()
+        elif navigator == "ie":
+            browser = webdriver.Ie()
+        elif navigator == "edge":
+            browser = webdriver.Edge()
+        else:
+            raise Exception("unable to interpret the navigator")
+
+        if not isinstance(url, list):
+            url = [url]
+        for u in url:
+            browser.get(u)
+            i = browser.page_source
+            res.append((u, i))
+        browser.quit()
+
+    elif module == "splinter":
+
+        from splinter import Browser
+
+        with Browser(navigator) as browser:
+            if not isinstance(url, list):
+                url = [url]
+            for u in url:
+                browser.visit(u)
+                i = browser.html
+                res.append((u, i))
+    else:
+        raise ImportError("unknown module required '{0}'".format(module))
+
+    return res