update to convert notebooks and python files

sdpython · Apr 20, 2016 · fa7cd14 · fa7cd14
1 parent a5892a3
commit fa7cd14
Show file tree

Hide file tree

Showing 4 changed files with 146 additions and 55 deletions.
diff --git a/src/ensae_teaching_cs/automation_students/projects_helper.py b/src/ensae_teaching_cs/automation_students/projects_helper.py
@@ -122,7 +122,6 @@ def extract_students_mails_from_gmail_and_stores_in_folders(folder=".", filemail
     fLOG("### nb groups", len(proj.Groups))
 
     # gathers mails
-
     email_render = EmailMessageRenderer(
         tmpl=template_email_html_short, fLOG=fLOG)
     render = EmailMessageListRenderer(title=title,
@@ -136,14 +135,17 @@ def extract_students_mails_from_gmail_and_stores_in_folders(folder=".", filemail
     box.logout()
 
     # cleaning files
-
     for group in proj.Groups:
         files = list(proj.enumerate_group_files(group))
         att = [_ for _ in files if ".html" in _]
         if len(att) <= 1:
             fLOG("### remove ", group)
             proj.remove_group(group)
 
+    # unzip files and convert notebooks
+    for group in proj.Groups:
+        proj.unzip_convert(group)
+
     fLOG("### summary ")
     summary = os.path.join(folder, "index.html")
     if os.path.exists(summary):

diff --git a/src/ensae_teaching_cs/automation_students/projects_repository.py b/src/ensae_teaching_cs/automation_students/projects_repository.py
@@ -5,14 +5,17 @@
 import re
 import os
 import numpy
+import warnings
 from urllib.parse import urlparse
-from pyquickhelper.loghelper import noLOG
+from pyquickhelper.loghelper import noLOG, unzip_files
 from pyquickhelper.texthelper import remove_diacritics
 from pyquickhelper.filehelper import remove_folder, explore_folder_iterfile
 from pyquickhelper.filehelper import zip_files
+from pyquickhelper.helpgen import nb2html
 from pymmails import EmailMessageRenderer, EmailMessage
 from .repository_exception import RegexRepositoryException, TooManyProjectsException
 from ..td_1a import edit_distance
+from ..homeblog.python_exemple_py_to_html import py_to_html_file
 
 
 class ProjectsRepository:
@@ -380,18 +383,9 @@ def match_mails(names, emails, threshold=3, exc=True, skip_names=None):
         return res, skip
 
     @staticmethod
-    def create_folders_from_dataframe(df,
-                                      root,
-                                      report="suivi.rst",
-                                      col_student="Eleves",
-                                      col_group="Groupe",
-                                      col_subject="Sujet",
-                                      col_mail=None,
-                                      overwrite=False,
-                                      email_function=None,
-                                      must_have_email=True,
-                                      skip_if_nomail=False,
-                                      skip_names=None,
+    def create_folders_from_dataframe(df, root, report="suivi.rst", col_student="Eleves", col_group="Groupe",
+                                      col_subject="Sujet", col_mail=None, overwrite=False, email_function=None,
+                                      must_have_email=True, skip_if_nomail=False, skip_names=None,
                                       fLOG=noLOG):
         """
         creates a series of folders for groups of students
@@ -440,9 +434,11 @@ def local_email_function_column(names, skip_names, mapping):
             if col_mail is None:
                 local_function = local_email_function
             else:
+                ind_student = list(df.columns).index(col_student) + 1
+                ind_mail = list(df.columns).index(col_mail) + 1
                 mapping = {}
                 for row in df.itertuples():
-                    mapping[getattr(row, col_student)] = getattr(row, col_mail)
+                    mapping[row[ind_student]] = row[ind_mail]
                 local_function = lambda names, skip, mapping=mapping: local_email_function_column(
                     names, skip_names, mapping)
         else:
@@ -502,15 +498,15 @@ def ul(last):
                 mails, skip = local_function(eleves, skip_names)
                 if must_have_email and (not skip and len(mails) == 0):
                     # we skip only if a group has no mails at all
-                    if isinstance(email_function, list):
-                        raise ProjectsRepository.MailNotFound("unable to find a mail for\n{0}\nname={1}\nskip:{4}\n{5}\namong\n{3}\nGROUP\n{2}".format(
+                    if isinstance(email_function, (list, set)):
+                        raise ProjectsRepository.MailNotFound("unable to find a mail for\n{0}\nname={1}\nskip:{4}\n{5}\namong\n{3}\nGROUP\n{2}\nlocal_function: {6}".format(
                             "; ".join("'%s'" % _ for _ in eleves),
                             name, group, "\n".join(email_function),
-                            skip, skip_names))
+                            skip, skip_names, local_function))
                     else:
                         raise ProjectsRepository.MailNotFound(
-                            "unable to find a mail for {0}\nname={1}\n with function\n{3}\nGROUP\n{2}".format(
-                                " ;".join(eleves), name, group, email_function))
+                            "unable to find a mail for {0}\nname={1}\n with function\n{3}\nGROUP\n{2}\nTYPE:\n{4}".format(
+                                " ;".join(eleves), name, group, email_function, type(email_function)))
                 if skip_if_nomail and (not skip and len(mails) == 0):
                     fLOG("ProjectsRepository.create_folders_from_dataframe [skipping {0}]".format(
                         "; ".join(eleves)))
@@ -753,6 +749,13 @@ def write_summary(self, render=None, link="index_mails.html",
                     {% endfor %}
                     </ul>
                 {% endif %}
+                {% if len(ps["created_files"]) > 0 %}
+                    <ul>
+                    {% for name, relpath, size in ps["created_files"] %}
+                        <li>added: <a href="{{ relpath }}">{{ name }}</a> {{ size }}</li>
+                    {% endfor %}
+                    </ul>
+                {% endif %}
                 </li>
             {% endfor %}
             </ol>
@@ -810,13 +813,17 @@ def format_size(s):
             atts = []
             emails = []
             links = []
+            created_files = []
             for name in self.enumerate_group_files(group):
                 if name.endswith(".metadata"):
                     continue
                 loc = self.get_group_location(group)
                 nb_files += 1
-                size += os.stat(os.path.join(loc, name)).st_size
-                if os.path.split(name)[0].endswith("attachments"):
+                tn = os.path.join(loc, name)
+                size += os.stat(tn).st_size
+                folder = os.path.split(name)[0]
+                splf = folder.replace("\\", "/").split("/")
+                if folder.endswith("attachments"):
                     meta = name + ".metadata"
                     if os.path.exists(meta):
                         data = EmailMessage.read_metadata(meta)
@@ -826,6 +833,13 @@ def format_size(s):
                         day = ""
                     atts.append((day, os.path.relpath(
                         name, self._location), data))
+                elif "attachments" in splf:
+                    rel = os.path.relpath(name, loc)
+                    dest = os.path.relpath(name, self._location)
+                    if rel == dest:
+                        raise Exception("weird\n{0}\n{1}".format(rel, dest))
+                    ssize = format_size(os.stat(name).st_size)
+                    created_files.append((rel, dest, ssize))
                 else:
                     mail = os.path.split(name)[-1]
                     res = EmailMessage.interpret_default_filename(mail)
@@ -860,13 +874,9 @@ def format_size(s):
 
             # we create the variable for the template
             emails = [_[-1] for _ in sorted(emails)]
-            c = dict(link=c[0].replace("\\", "/"),
-                     group=c[1],
-                     nb=nb_files,
-                     size=size,
-                     attachments=atts,
-                     emails=emails,
-                     links=links)
+            c = dict(link=c[0].replace("\\", "/"), group=c[1], nb=nb_files,
+                     size=size, attachments=atts, emails=emails, links=links,
+                     created_files=created_files)
 
             groups.append(c)
 
@@ -899,6 +909,13 @@ def format_size(s):
                             {% endfor %}
                             </ul>
                         {% endif %}
+                        {% if len(ps["created_files"]) > 0 %}
+                            <ul>
+                            {% for name, relpath, size in ps["created_files"] %}
+                                <li>added: <a href="{{ relpath }}">{{ name }}</a> {{ size }}</li>
+                            {% endfor %}
+                            </ul>
+                        {% endif %}
                         </li>
                     {% endfor %}
                     </ol>
@@ -916,3 +933,75 @@ def format_size(s):
         if dof:
             render.flush()
         return res
+
+    def unzip_convert(self, group):
+        """
+        unzip files and convert notebooks into html
+
+        @param          group       group name
+        @return                     list of new files
+        """
+        self.unzip_files(group)
+        return self.convert_files(group)
+
+    def unzip_files(self, group):
+        """
+        unzip files and convert notebooks into html
+
+        @param          group       group name
+        @return                     list of new filess
+        """
+        names = list(self.enumerate_group_files(group))
+        files = []
+        for name in names:
+            if "attachments" not in name:
+                continue
+            ext = os.path.splitext(name)[-1]
+            if ext == ".zip":
+                folder = os.path.splitext(name)[0] + "_zip"
+                folder = folder.replace(" ", "_").replace(",", "_")
+                if not os.path.exists(folder):
+                    self.fLOG(
+                        "ProjectsRepository.unzip_files [unzip {0}]".format(name))
+                    self.fLOG(
+                        "ProjectsRepository.unzip_files [creating {0}]".format(folder))
+                    os.mkdir(folder)
+                    l = unzip_files(name, folder, fLOG=self.fLOG)
+                    files.extend(l)
+                else:
+                    # already done, we do not do it again
+                    pass
+        return files
+
+    def convert_files(self, group):
+        """
+        convert notebook into html
+
+        @param          group       group name
+        @return                     list of new files
+        """
+        names = list(self.enumerate_group_files(group))
+        files = []
+        for name in names:
+            if "attachments" not in name:
+                continue
+            ext = os.path.splitext(name)[-1]
+            if ext == ".ipynb":
+                self.fLOG(
+                    "ProjectsRepository.convert_files [convert {0}]".format(name))
+                out = name + ".html"
+                nb2html(name, out)
+                files.append(out)
+            elif ext == ".py":
+                self.fLOG(
+                    "ProjectsRepository.convert_files [convert {0}]".format(name))
+                out = name + ".html"
+                try:
+                    py_to_html_file(name, out, False, title=os.path.relpath(
+                        name, self.get_group_location(group)))
+                    files.append(out)
+                except Exception as e:
+                    # the syntax of the python file might be wrong
+                    warnings.warn(
+                        "unable to convert File \"{0}\"".format(name))
+        return files
diff --git a/src/ensae_teaching_cs/homeblog/py2html.py b/src/ensae_teaching_cs/homeblog/py2html.py
@@ -197,7 +197,7 @@ def | <font color="blue"> | </font>
 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
 <html>
 <head>
-<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1 (Latin-1)" >
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8" >
 <title>%s</title>
 <style type="text/css">
     h1 {    color: green;
@@ -425,7 +425,7 @@ def replaceCodes(text=""):
     return text
 
 
-def file2HTML(file_name, format, style, Replace, entity="1"):
+def file2HTML(file_name, format, style, Replace, entity="1", encoding="utf-8"):
     """Reads a file and returns the contents as a string,
     highlighted with HTML styles. This function uses the
     output of the tokenize module to decide what to colour.
@@ -440,26 +440,26 @@ def file2HTML(file_name, format, style, Replace, entity="1"):
     if file_name == "<stdin>":
         file_name = "temp_stdin.py2html.tmp"
         lines = sys.stdin.readlines()
-        with open(file_name, "w") as f:
+        with open(file_name, "w", encoding=encoding) as f:
             f.writelines(lines)
         removeFile = file_name
     elif len(file_name) < 1000 and os.path.exists(file_name):
         try:
             # , encoding="utf8").readlines() #copy all lines into lines list
-            lines = open(file_name, 'r').readlines()
+            with open(file_name, 'r', encoding=encoding) as f:
+                lines = f.readlines()
         except UnicodeDecodeError as e:
             print("issue with file ", file_name)
             raise e
     else:
         lines = file_name.split("\n")
         file_name = "temp_py2html.tmp"
-        f = open(file_name, "w")
-        f.writelines("\n".join(lines))
-        f.close()
+        with open(file_name, "w", encoding=encoding) as f:
+            f.writelines("\n".join(lines))
         removeFile = file_name
 
     lines = ['', ] + lines
-    tempPointer = open(file_name, 'r')
+    tempPointer = open(file_name, 'r', encoding=encoding)
     read_line = tempPointer.readline  # , encoding="utf8").readline
     # use tokenize to interate through tokens
     tok = tokenize.generate_tokens(read_line)

diff --git a/src/ensae_teaching_cs/homeblog/python_exemple_py_to_html.py b/src/ensae_teaching_cs/homeblog/python_exemple_py_to_html.py
@@ -119,7 +119,7 @@ def py_to_html_folder(folder, addGoogleTracking=True):
     return res
 
 
-def py_to_html_file(file, writehtml="", addGoogleTracking=True):
+def py_to_html_file(file, writehtml="", addGoogleTracking=True, title=None):
     """
     convert a python script into a html file
 
@@ -138,38 +138,38 @@ def py_to_html_file(file, writehtml="", addGoogleTracking=True):
     racine, ext = os.path.splitext(file)
 
     try:
-        tf = open(file, "r")
-        content = tf.read()
-        tf.close()
+        with open(file, "r", encoding="utf-8") as tf:
+            content = tf.read()
+        encoding = "utf-8"
     except UnicodeDecodeError as e:
         try:
-            tf = open(file, "r", encoding="latin1")
-            content = tf.read()
-            tf.close()
+            with open(file, "r", encoding="latin-1") as tf:
+                content = tf.read()
+            encoding = "utf-8"
         except UnicodeDecodeError:
-            tf = open(file, "r", encoding="utf8")
-            content = tf.read()
-            tf.close()
+            with open(file, "r", encoding="utf-8", errors="utf-8") as tf:
+                content = tf.read()
+            encoding = "utf-8"
 
     content = cleanFileFromtohtmlreplace(content)
 
     appliedstyle = readStyleFile(None)
     try:
-        data = file2HTML(content, "0", appliedstyle, False, "1")
+        data = file2HTML(content, "0", appliedstyle,
+                         False, "1", encoding=encoding)
         block = makeBlock(data)
         page = py_page.replace(googleTrackerMarker, googlet)
-        html = page % (f, f, block, py2html__version__)
+        html = page % (title or f, title or f, block, py2html__version__)
     except Exception as e:
-        fLOG("not python file, running it again ", file, " error ", e)
-        raise e
+        raise Exception(
+            "not python file, running it again {0}".format(file)) from e
 
     if len(writehtml) > 0:
         outfile = writehtml
     else:
         outfile = racine + ".html"
 
-    file = open(outfile, "w")  # , encoding="utf8")
-    file.write(html)
-    file.close()
+    with open(outfile, "w", encoding=encoding) as f:
+        f.write(html)
 
     return outfile