Hi, I am the **Report tool** 👋 <br>
First press run all under the **Runtime** tab or **crtl+F9**.
##### The first run will be a little slower, because I have to install some libraries.<br> After the first run I am ready to be used, but if you need to run me again, just press ▶️ under the **UI** title.
When my UI will pops up, just feed me the name of the report and the images you need to use, and I'll populate a table where you can edit the description for each image, after that you have just to ask me to generate the report and once it is ready, I'll publish a link for the download, easy. 😁<br>
For 🐛 Bugs or 💬 feedbacks **report them using github issues**.<br/>
For ❓ Questions **Charlie Banks or Mirco Bianchini** will support you. 🧐


### Just load libraries and setting up some global variables.

In [1]:
#@title
%%capture 
!pip install jupyter-dash
!pip install Flask
!pip install python-docx
!pip install python-pptx
!pip install pandas

import fnmatch
import itertools
import os
import base64
import dash
import pandas as pd
import copy
import math
from abc import ABC, abstractmethod

from jupyter_dash import JupyterDash
from dash.dependencies import Input, Output, State
from dash import dcc, html, dash_table
from docx import Document
from docx.enum.text import WD_BREAK
from docx.shared import Inches
from pptx import Presentation
from pptx.util import Cm, Pt
from pptx.enum.shapes import MSO_SHAPE_TYPE
from collections import namedtuple
from urllib.parse import quote as urlquote
from flask import Flask, send_from_directory

In [2]:
#@title
# in here we keep the global variables
UPLOAD_DIRECTORY  = "/content/sample_data/uploadedReport"
UPLOAD_IMAGES = "/content/sample_data/uploadedReport/Images"
UPLOAD_REPORTS = "/content/sample_data/uploadedReport/Reports"
UPLOAD_TEMPLATE = "/content/sample_data/uploadedReport/Template"

if not os.path.exists(UPLOAD_DIRECTORY):
    os.makedirs(UPLOAD_DIRECTORY)
    os.makedirs(UPLOAD_IMAGES)
    os.makedirs(UPLOAD_REPORTS)
    os.makedirs(UPLOAD_TEMPLATE)

### Core general methods

In [3]:
#@title
# this is where we keep the core definitions used.

def save_file(name, content, directory):
    """Decode and store a file uploaded with Plotly Dash."""
    data = content.encode("utf8").split(b";base64,")[1]
    with open(os.path.join(directory, name), "wb") as fp:
        fp.write(base64.decodebytes(data))

def uploaded_files(directory:str):
    """List the files in the upload directory."""
    uploaded_file = namedtuple('uploaded_file', 'name dir')
    files = []
    for filename in os.listdir(directory):
        path = os.path.join(directory, filename)
        if os.path.isfile(path):
            files.append(uploaded_file(name=filename, dir=path))
    files.sort(key=lambda tup:tup.name)
    return files

def file_download_link(filename):
    """Create a Plotly Dash 'A' element that downloads a file from the app."""
    location = "/download/Reports/{}".format(urlquote(filename))    
    return html.A(filename, href=location)

def path_report(filename:str):
    """Create the path of the report."""
    location = os.path.join(UPLOAD_REPORTS, filename)
    return location

def bump_version(file_report):
  formatted_file_name = ''
  file_name = os.path.splitext(file_report)[0]
  if 'version' in file_name:
    name_split = file_name.split('-')
    version = name_split[1].replace('version', '')
    version_bump = int(version)+1
    formatted_file_name = "{}-version{:02d}".format(name_split[0], version_bump)
  else:
    formatted_file_name = "{0}-version01".format(file_name)
  
  return formatted_file_name

def get_layout(pres, layout_name):
  """Search for the layout used into the selected slide."""
  layout_items_name = [layout.name for layout in pres.slide_layouts]
  layout_id = layout_items_name.index(layout_name)
  return pres.slide_layouts[layout_id]

In [4]:
#@title
class Report_tool(ABC):
  def __init__(self, report_name:str, extension_file:str, source_images:str, data_frame:pd.DataFrame):
    self.source_images = source_images
    self.location_report_uploaded = path_report(f'{report_name}.{extension_file}')
    self.data_dict = data_frame.to_dict(orient='records')
    self.files = uploaded_files(UPLOAD_IMAGES)

  @abstractmethod
  def generate_report(self):
    pass

def report_factory(is_template_uploaded:bool, report_type:str, report_name:str, image_source:str, data_frame:pd.DataFrame):
  if is_template_uploaded == True:
    preso_template = [preso for preso in uploaded_files(UPLOAD_TEMPLATE) if report_type in preso.name][0]
    if report_type == 'doc':
      return ReportDocByTemplate(preso_template.dir, report_name, image_source, data_frame)
    else:
      return ReportPPTByTemplate(preso_template.dir, report_name, image_source, data_frame)
  else:
    if report_type == 'doc':
      return DefaultReportDoc(report_name, image_source, data_frame)
    else:
      return DefaultReportPPT(report_name, image_source, data_frame)

### Core methods for PowerPoint

In [5]:
#@title
# Methods to create report in power point
class ReportPPTByTemplate(Report_tool):
  def __init__(self, preso_directory:str, report_name, source_images, data_frame):
    self._preso_directory = preso_directory
    super().__init__(report_name, 'pptx', source_images, data_frame)

  def generate_report(self):
    """Create the report using a template."""
    template = Presentation(self._preso_directory)
    shape_groups = []
    shapes_to_copy = []

    source = template.slides[0]
    layout_name = source.slide_layout.name
    template_layout = get_layout(template, layout_name)

    # divides the shapes in two groups.
    for shape in source.shapes:
      if shape.shape_type == MSO_SHAPE_TYPE.GROUP:
        shape_groups.append(shape)
      else:
        shapes_to_copy.append(shape)
    
    # count how many slide has to be created.
    number_of_slides = math.ceil(len(self.files) / len(shape_groups))
    
    group_count = 0
    i = 0
    while i < number_of_slides:
      # copied slide, with only the element filtered.
      copied_slide = template.slides.add_slide(template_layout)
      for shape in shapes_to_copy:
        newel = copy.deepcopy(shape.element)
        copied_slide.shapes._spTree.insert_element_before(newel, 'p:extLst')

      for group in shape_groups:
        if group_count > len(self.files)-1:
          break
        group_chOff = group._element.xpath("./p:grpSpPr/a:xfrm/a:chOff")[0]
        group_off = group._element.xpath("./p:grpSpPr/a:xfrm/a:off")[0]
        for shape in group.shapes:
          shape_off = shape._element.xpath("./p:spPr/a:xfrm/a:off")[0]
          x_loc = shape_off.x + group_off.x - group_chOff.x
          y_loc = shape_off.y + group_off.y - group_chOff.y
          if shape.shape_type == MSO_SHAPE_TYPE.TEXT_BOX:
            frame = shape.text_frame
            pa = frame.paragraphs[0]
            ru = pa.runs[0]
            txBox = copied_slide.shapes.add_textbox(x_loc, y_loc, shape.width, shape.height)
            tf = txBox.text_frame
            tf.word_wrap = True
            p = tf.paragraphs[0]
            run = p.add_run()
            run.text = self.source_images + '-' + self.data_dict[group_count]['Description']
            run.font.size = Pt(12) if ru.font.size == None else ru.font.size
            run.font.name = 'Arial' if ru.font.name == None else ru.font.name
          else:
            pic = copied_slide.shapes.add_picture(self.files[group_count].dir, x_loc, y_loc, shape.width, shape.height)
        
        # this handle only one picture per group.
        group_count += 1

      i += 1

    # remove the first slide
    xml_slides = template.slides._sldIdLst
    slides = list(xml_slides)
    xml_slides.remove(slides[0])

    template.save(self.location_report_uploaded)
    return 'report generated'

class DefaultReportPPT(Report_tool):
  def __init__(self, report_name, source_images, data_frame):
    super().__init__(report_name, 'pptx', source_images, data_frame)

  def generate_report(self):
    """Create the report as power-point."""
    preso = Presentation()

    # default slide width
    #preso.slide_width = 9144000
    # slide height @ 4:3
    #preso.slide_height = 6858000
    # slide height @ 16:9
    #preso.slide_height = 5143500

    preso.slide_width = Cm(33.858)
    preso.slide_height = Cm(19.05)

    for i in range(0, len(self.files), 2):

      blank_slide_layout = preso.slide_layouts[6]
      slide = preso.slides.add_slide(blank_slide_layout)

      # Title text box
      # https://python-pptx.readthedocs.io/en/latest/user/text.html
      # https://python-pptx.readthedocs.io/en/latest/dev/analysis/txt-autofit-text.html
      left = Cm(1.46)
      top = Cm(1.46)
      height = Cm(1.28)
      txBox = slide.shapes.add_textbox(left, top, Cm(30), height)
      tf = txBox.text_frame
      p = tf.paragraphs[0]
      run = p.add_run()
      run.text = "Page Title"
      run.font.bold = True
      run.font.size = Pt(28)
      p = tf.add_paragraph()
      p.text = "Sub-Title of the page."
      p.font.size = Pt(22)

      # Paragraph
      left = Cm(1.46)
      top = Cm(4.73)
      height = Cm(12.85)
      width = Cm(6.81)
      txBox = slide.shapes.add_textbox(left, top, width, height)
      tf = txBox.text_frame
      tf.word_wrap = True
      p = tf.paragraphs[0]
      run = p.add_run()
      run.text = "Paragrapher where you can write all the information you want."
      run.font.size = Pt(16)

      # Pictures
      left = Cm(8.28)
      top = Cm(4.73)
      height = Cm(12.85)
      width = Cm(12.06)
      pic = slide.shapes.add_picture(self.files[i].dir, left, top, height=height, width=width)
      txBox = slide.shapes.add_textbox(left, top + height, width, Cm(1.28))
      tf = txBox.text_frame
      tf.word_wrap = True
      p = tf.paragraphs[0]
      run = p.add_run()
      run.text = self.source_images + '-' + self.data_dict[i]['Description']
      run.font.size = Pt(14)

      left = Cm(20.42)
      pic = slide.shapes.add_picture(self.files[i+1].dir, left, top, height=height, width=width)
      txBox = slide.shapes.add_textbox(left, top + height, width, Cm(1.28))
      tf = txBox.text_frame
      tf.word_wrap = True
      p = tf.paragraphs[0]
      run = p.add_run()
      run.text = self.source_images + '-' + self.data_dict[i+1]['Description']
      run.font.size = Pt(14)

    preso.save(self.location_report_uploaded)
    return 'report generated'

### Core methods for World

In [6]:
#@title
# Methods to create report in doc
class ReportDocByTemplate(Report_tool):
  def __init__(self, preso_directory:str, report_name, source_images, data_frame):
    self._preso_directory = preso_directory
    super().__init__(report_name, 'docx', source_images, data_frame)

  def generate_report(self):
    """Create the report using the doc template."""
    document = Document(self._preso_directory)

    table_count = len(document.tables)
    if table_count != 1:
      return f'Error the template has more than one table'

    table = document.tables[0]
    columns_count = len(table.columns)
    row = table.rows[0]
    number_of_row = math.ceil(len(self.files) / columns_count)

    j = 0
    data_index = 0
    row = 1
    for i in range(number_of_row):
      table.add_row()
      while j < columns_count:
        template_cell = table.cell(0, j)
        cell = table.cell(row, j)
        for p in range(len(template_cell.paragraphs)):
          temp_par = template_cell.paragraphs[p]
          temp_run = temp_par.runs[0]

          if data_index >= len(self.files):
            break

          if p > 0:
            cell.add_paragraph('')
          cell_par = cell.paragraphs[p]
          cell_run = cell_par.add_run()
          cell_text = self.source_images + '-' + self.data_dict[data_index]['Description']

          if 'description' not in template_cell.paragraphs[p].text.lower():
            cell_run.text = template_cell.paragraphs[p].text
          else:
            if 'bottom' in template_cell.paragraphs[p].text.lower():
              cell_run.add_picture(self.files[data_index].dir, width=template_cell.width) 
              cell_run.add_break()
              cell_run.add_text(cell_text)
            else:
              cell_run.text = cell_text
              cell_run.add_break()
              cell_run.add_picture(self.files[data_index].dir, width=template_cell.width)  

          cell_run.font.name = 'Arial' if temp_run.font.name == None else temp_run.font.name
          cell_run.font.size = Pt(12) if temp_run.font.size == None else temp_run.font.size
          cell_run.bold = temp_run.bold
          
        j+=1
        data_index+=1
      row+=1
      j=0

    tbl = table._tbl
    tr = table.rows[0]._tr
    tbl.remove(tr)

    document.save(self.location_report_uploaded)
    return 'report generated'

class DefaultReportDoc(Report_tool):
  def __init__(self, report_name, source_images, data_frame):
    super().__init__(report_name, 'docx', source_images, data_frame)

  def generate_report(self):
    """Create the report as doc."""
    document = Document()

    # cover page some of this information could be extracted as an input.
    document.add_heading('Document Title', 0)

    p = document.add_paragraph('A plain paragraph having some ')
    p.add_run('bold').bold = True
    p.add_run(' and some ')
    p.add_run('italic.').italic = True

    # create the table
    # the number of the columns can be extracted as an input.
    row_count = len(self.files)
    col_count = 2
    table = document.add_table(rows=row_count, cols=col_count)

    # set the table size
    # these sizes could be extracted as inputs.
    for row in table.rows:
      row.width = 2.9
      row.width = 4

    c = -1
    r = 0
    for i, uploaded_file in enumerate(self.files, start=1):
      if i%2:
        c += 1
        r = 0
      cell = table.cell(c,r)
      picture_name = self.data_dict[i-1]['PictureName']
      description = self.data_dict[i-1]['Description'] if picture_name == uploaded_file.name else 'Description not found'
      cell.text = f'{picture_name}:{description}'

      paragraph = table.cell(c,r).paragraphs[0]
      run = paragraph.add_run()
      run.add_picture(self.files[i-1].dir, width=Inches(2.5))
      run.add_break()
      run.add_text(self.source_images)

      run.AllowBreakAcrossPages = False
      r += 1

    document.save(self.location_report_uploaded)
    return 'report generated'


### UI backend logic

In [7]:
#@title
# https://docs.faculty.ai/user-guide/apps/examples/dash_file_upload_download.html
server = Flask(__name__)
@server.route("/download/<path:path>")
def download(path):
    """Serve a file from the upload directory."""
    return send_from_directory(UPLOAD_DIRECTORY, path, as_attachment=True)

# we can use another style.
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

app = JupyterDash(server=server, external_stylesheets=external_stylesheets)
app.layout = html.Div([
    html.H5("I need some information"),
    # TODO: this could be made batter in term of layout structure.
    html.I("The name of the report.", style={'marginRight':'55px'}),
    html.I("Image source.", style={'marginRight':'55px'}),

    html.Br(),

    dcc.Input(id="report-name", type='text', value='myDoc', style={'marginRight':'10px'}),
    dcc.Input(id="image-resource", type='text', value='MottMacdonald', style={'marginRight':'10px'}),
    
    html.Br(),
    html.Br(),
    html.Label('Report format'),
    dcc.RadioItems(
        id='report-format',
        options=[
            {'label': 'doc', 'value': 'doc'},
            {'label': 'ppt', 'value': 'ppt'}
        ],
        value='doc',
        labelStyle={'display': 'inline-block'}
    ),

    html.Br(),

    html.I("The file name of the images must be without spaces"),
    dcc.Upload(
        id='upload-image',
        children=html.Div([
            'Drag and Drop or ',
            html.A('Select the images')
        ]),
        style={
            'width': '100%',
            'height': '60px',
            'lineHeight': '60px',
            'borderWidth': '1px',
            'borderStyle': 'dashed',
            'borderRadius': '5px',
            'textAlign': 'center',
            'margin': '10px'
        },
        # Allow multiple files to be uploaded
        multiple=True
    ),

    dcc.Upload(
        id='upload-template',
        children=html.Div([
            'Drag and Drop or ',
            html.A('Select your template')
        ]),
        style={
            'width': '100%',
            'height': '40px',
            'lineHeight': '40px',
            'borderWidth': '1px',
            'borderStyle': 'dashed',
            'borderRadius': '5px',
            'textAlign': 'center',
            'margin': '10px'
        },
    ),

    html.Br(),

    dcc.Loading(
        id="loading",
        type="default",
        children=html.Div(id="loading-output")
    ),

    html.Div(id='template-message'),

    html.Br(),

    dash_table.DataTable(style_data={'whiteSpace': 'normal'}, 
        id='table',
        fixed_rows={'headers': True},
        style_table={'height': 'auto'}, 
        sort_action='native',
        editable=True, 
        row_deletable=True,
        style_cell={'textAlign': 'left'}),
    
    html.Br(),

    html.Div(id='removed-image-by-row'),
    
    html.Br(),
    
    html.Button('Clean uploading', id='clean-uploading'),
    html.Button('Generate Report', id='generate-report'),

    html.Br(),

    html.H4("Report to download"),
    html.Ul(id="download-link"),

    # debugging components
    # this store the descriptions
    # html.Div(id='container', hidden='true')
    dcc.Store(id='store-description'),
    dcc.Store(id='template-uploaded', data=False),

    # this to visualize output into callbacks
    # html.P(id='output'),
    html.P(id='log'),
])

@app.callback(
    Output('removed-image-by-row', 'children'),
    [Input("table", "data_previous")],
    [State("table", "data")])
def remove_image_when_row_is_removed(data_previous, data_current):
    """Remove the image if the row is removed."""
    if data_previous is None:
      dash.exceptions.PreventUpdate()
    else:
      set_diff = set([i['PictureName'] for i in data_previous]) - set([i['PictureName'] for i in data_current])
      image_to_remove = list(set_diff).pop()
      path = os.path.join(UPLOAD_IMAGES, image_to_remove)
      os.remove(path)
      return html.Div(["Image removed: ", html.B(children=image_to_remove)])

@app.callback(Output("loading-output", "children"), Input("upload-template", "contents"))
def input_triggers_spinner(value):
    time.sleep(1)
    return value

@app.callback(
    Output('template-message', 'children'),
    Output('template-uploaded', 'data'),
    [Input("upload-template", "filename"), 
    Input("upload-template", "contents")],
    State('report-format', 'value'),
    prevent_initial_call=True)
def store_template(uploaded_filename, uploaded_file_content, report_file_type):
    """Store the tample into the folder."""
    message = f'{uploaded_filename} uploaded'
    save_file(uploaded_filename, uploaded_file_content, UPLOAD_TEMPLATE)
    if report_file_type not in uploaded_filename:
      message = f'Uploaded template and report format are not the same, please switch the type of report using the toggle above.'
      return html.Div([html.P(children=message, style={'color':'red'})]), True
    return html.Div([html.P(children=message)]), True

@app.callback(
    Output('table', 'data'),
    Output('table', 'columns'),
    [Input("upload-image", "filename"), 
    Input("upload-image", "contents")],
    State('store-description', 'data'),
    prevent_initial_call=True)
def generate_table(uploaded_filenames, uploaded_file_contents, data_stored):
    """Creates the table based on the uploaded images."""

    data = {}
    try:
      image_names = []
      descriptios = []

      for name, data in zip(uploaded_filenames, uploaded_file_contents):
        save_file(name, data, UPLOAD_IMAGES)

      if data_stored is not None:
        df_stored = pd.DataFrame.from_dict(data_stored)
        image_names = df_stored["PictureName"].tolist()
        descriptios = df_stored["Description"].tolist()

      files = uploaded_files(UPLOAD_IMAGES)
      for uploaded_file in files:
        if uploaded_file.name in image_names:
          continue
        image_names.append(uploaded_file.name)
        descriptios.append('Modify Description')

      data = {"PictureName":image_names, "Description":descriptios}
      df = pd.DataFrame(data)

      return df.to_dict('records'), [{"name": i, "id": i} for i in df.columns]

    except Exception as e:
      return {err:e},[]

@app.callback(
    Output('store-description', 'data'),
    [Input('table', 'data_timestamp')],
    [State('table', 'data'), 
    State('table', 'columns')],
    prevent_initial_call=True)
def store_data_from_table(time_stamp, rows, columns):
    """Store the data table."""
    df = pd.DataFrame(rows, columns=[c['name'] for c in columns])
    return df.to_dict('records')

@app.callback(
    Output('store-description', 'clear_data'),
    Input("clean-uploading", "n_clicks"),
    prevent_initial_call=True)
def clean_upload_folder(n_clicks):
    """Clean the upload folder."""
    files = uploaded_files(UPLOAD_IMAGES)
    for file in files:
      os.remove(file.dir)
    return True

@app.callback(
    Output("download-link", "children"),
    Output("log", "children"),
    Input("generate-report", "n_clicks"),
    [State("report-name", "value"),  
    State("image-resource", "value"),  
    State('table', 'data'),
    State('table', 'columns'),
    State('report-format', 'value'),
    State('template-uploaded', 'data')],
    prevent_initial_call=True)
def report_generator(n_clicks, report_name, image_resource, rows, columns, report_format, is_template_uploaded):
    """Save and uploaded reports and regenerate the file list."""

    df = pd.DataFrame(rows, columns=[c['name'] for c in columns])

    included_extensions = ['docx','pptx']
    reports = [fn for fn in os.listdir(UPLOAD_REPORTS)
              if any(fn.endswith(ext) for ext in included_extensions)]
    if len(reports) > 0 and report_name in reports[-1]:
        report_name = bump_version(reports[-1])
    
    report = report_factory(is_template_uploaded, report_format, report_name, image_resource, df)
    result = report.generate_report()

    reports = uploaded_files(UPLOAD_REPORTS)
    return [html.Li(file_download_link(report.name))for report in reports], result

app.run_server('inline')

<IPython.core.display.Javascript object>