Skip to content

Commit

Permalink
Move to pandoc for rendering sponsorship contracts (#2343)
Browse files Browse the repository at this point in the history
* Move to pandoc for rendering sponsorship contracts

* use renewal template if applicable

* consistency

* formatting tweaks

* bit cleaner names for files

* grinding out formatting
  • Loading branch information
ewdurbin committed Feb 21, 2024
1 parent c1b800b commit a4990b7
Show file tree
Hide file tree
Showing 20 changed files with 605 additions and 483 deletions.
12 changes: 12 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,18 @@ jobs:
steps:
- name: Check out repository
uses: actions/checkout@v2
- name: Install platform dependencies
run: |
sudo apt -y update
sudo apt -y install --no-install-recommends \
texlive-latex-base \
texlive-latex-recommended \
texlive-plain-generic \
lmodern
- name: Install pandoc
run: |
wget https://github.com/jgm/pandoc/releases/download/2.17.1.1/pandoc-2.17.1.1-1-amd64.deb
sudo dpkg -i pandoc-2.17.1.1-1-amd64.deb
- uses: actions/setup-python@v2
with:
python-version: 3.9.16
Expand Down
Empty file added Aptfile
Empty file.
42 changes: 40 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,9 +1,47 @@
FROM python:3.9-bullseye
FROM python:3.9-bookworm
ENV PYTHONUNBUFFERED=1
ENV PYTHONDONTWRITEBYTECODE=1

# By default, Docker has special steps to avoid keeping APT caches in the layers, which
# is good, but in our case, we're going to mount a special cache volume (kept between
# builds), so we WANT the cache to persist.
RUN set -eux; \
rm -f /etc/apt/apt.conf.d/docker-clean; \
echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache;

# Install System level build requirements, this is done before
# everything else because these are rarely ever going to change.
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/var/lib/apt,sharing=locked \
set -x \
&& apt-get update \
&& apt-get install --no-install-recommends -y \
pandoc \
texlive-latex-base \
texlive-latex-recommended \
texlive-fonts-recommended \
texlive-plain-generic \
lmodern

RUN case $(uname -m) in \
"x86_64") ARCH=amd64 ;; \
"aarch64") ARCH=arm64 ;; \
esac \
&& wget --quiet https://github.com/jgm/pandoc/releases/download/2.17.1.1/pandoc-2.17.1.1-1-${ARCH}.deb \
&& dpkg -i pandoc-2.17.1.1-1-${ARCH}.deb

RUN mkdir /code
WORKDIR /code

COPY dev-requirements.txt /code/
COPY base-requirements.txt /code/
RUN pip install -r dev-requirements.txt

RUN pip --no-cache-dir --disable-pip-version-check install --upgrade pip setuptools wheel

RUN --mount=type=cache,target=/root/.cache/pip \
set -x \
&& pip --disable-pip-version-check \
install \
-r dev-requirements.txt

COPY . /code/
7 changes: 3 additions & 4 deletions base-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,11 @@ django-filter==2.4.0
django-ordered-model==3.4.3
django-widget-tweaks==1.4.8
django-countries==7.2.1
xhtml2pdf==0.2.5
django-easy-pdf3==0.1.2
num2words==0.5.10
django-polymorphic==3.0.0
sorl-thumbnail==12.7.0
docxtpl==0.12.0
reportlab==3.6.6
django-extensions==3.1.4
django-import-export==2.7.1

pypandoc==1.12
panflute==2.3.0
1 change: 0 additions & 1 deletion pydotorg/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,6 @@
'ordered_model',
'widget_tweaks',
'django_countries',
'easy_pdf',
'sorl.thumbnail',

'banners',
Expand Down
89 changes: 89 additions & 0 deletions sponsors/contracts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import os
import tempfile

from django.http import HttpResponse
from django.template.loader import render_to_string
from django.utils.dateformat import format

import pypandoc

dirname = os.path.dirname(__file__)
DOCXPAGEBREAK_FILTER = os.path.join(dirname, "pandoc_filters/pagebreak.py")
REFERENCE_DOCX = os.path.join(dirname, "reference.docx")


def _clean_split(text, separator="\n"):
return [
t.replace("-", "").strip()
for t in text.split("\n")
if t.replace("-", "").strip()
]


def _contract_context(contract, **context):
start_date = contract.sponsorship.start_date
context.update(
{
"contract": contract,
"start_date": start_date,
"start_day_english_suffix": format(start_date, "S"),
"sponsor": contract.sponsorship.sponsor,
"sponsorship": contract.sponsorship,
"benefits": _clean_split(contract.benefits_list.raw),
"legal_clauses": _clean_split(contract.legal_clauses.raw),
}
)
previous_effective = contract.sponsorship.previous_effective_date
context["previous_effective"] = previous_effective if previous_effective else "UNKNOWN"
context["previous_effective_english_suffix"] = format(previous_effective, "S") if previous_effective else "UNKNOWN"
return context


def render_markdown_from_template(contract, **context):
template = "sponsors/admin/contracts/sponsorship-agreement.md"
if contract.sponsorship.renewal:
template = "sponsors/admin/contracts/renewal-agreement.md"
context = _contract_context(contract, **context)
return render_to_string(template, context)


def render_contract_to_pdf_response(request, contract, **context):
response = HttpResponse(
render_contract_to_pdf_file(contract, **context), content_type="application/pdf"
)
return response


def render_contract_to_pdf_file(contract, **context):
with tempfile.NamedTemporaryFile() as docx_file:
with tempfile.NamedTemporaryFile(suffix=".pdf") as pdf_file:
markdown = render_markdown_from_template(contract, **context)
pdf = pypandoc.convert_text(
markdown, "pdf", outputfile=pdf_file.name, format="md"
)
return pdf_file.read()


def render_contract_to_docx_response(request, contract, **context):
response = HttpResponse(
render_contract_to_docx_file(contract, **context),
content_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
)
response[
"Content-Disposition"
] = f"attachment; filename={'sponsorship-renewal' if contract.sponsorship.renewal else 'sponsorship-contract'}-{contract.sponsorship.sponsor.name.replace(' ', '-').replace('.', '')}.docx"
return response


def render_contract_to_docx_file(contract, **context):
markdown = render_markdown_from_template(contract, **context)
with tempfile.NamedTemporaryFile() as docx_file:
docx = pypandoc.convert_text(
markdown,
"docx",
outputfile=docx_file.name,
format="md",
filters=[DOCXPAGEBREAK_FILTER],
extra_args=[f"--reference-doc", REFERENCE_DOCX],
)
return docx_file.read()
Empty file.
90 changes: 90 additions & 0 deletions sponsors/pandoc_filters/pagebreak.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

# ------------------------------------------------------------------------------
# Source: https://github.com/pandocker/pandoc-docx-pagebreak-py/
# Revision: c8cddccebb78af75168da000a3d6ac09349bef73
# ------------------------------------------------------------------------------
# MIT License
#
# Copyright (c) 2018 pandocker
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
# ------------------------------------------------------------------------------

""" pandoc-docx-pagebreakpy
Pandoc filter to insert pagebreak as openxml RawBlock
Only for docx output
Trying to port pandoc-doc-pagebreak
- https://github.com/alexstoick/pandoc-docx-pagebreak
"""

import panflute as pf


class DocxPagebreak(object):
pagebreak = pf.RawBlock("<w:p><w:r><w:br w:type=\"page\" /></w:r></w:p>", format="openxml")
sectionbreak = pf.RawBlock("<w:p><w:pPr><w:sectPr><w:type w:val=\"nextPage\" /></w:sectPr></w:pPr></w:p>",
format="openxml")
toc = pf.RawBlock(r"""
<w:sdt>
<w:sdtContent xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
<w:p>
<w:r>
<w:fldChar w:fldCharType="begin" w:dirty="true" />
<w:instrText xml:space="preserve">TOC \o "1-3" \h \z \u</w:instrText>
<w:fldChar w:fldCharType="separate" />
<w:fldChar w:fldCharType="end" />
</w:r>
</w:p>
</w:sdtContent>
</w:sdt>
""", format="openxml")

def action(self, elem, doc):
if isinstance(elem, pf.RawBlock):
if elem.text == r"\newpage":
if (doc.format == "docx"):
pf.debug("Page Break")
elem = self.pagebreak
# elif elem.text == r"\newsection":
# if (doc.format == "docx"):
# pf.debug("Section Break")
# elem = self.sectionbreak
# else:
# elem = []
elif elem.text == r"\toc":
if (doc.format == "docx"):
pf.debug("Table of Contents")
para = [pf.Para(pf.Str("Table"), pf.Space(), pf.Str("of"), pf.Space(), pf.Str("Contents"))]
div = pf.Div(*para, attributes={"custom-style": "TOC Heading"})
elem = [div, self.toc]
else:
elem = []
return elem


def main(doc=None):
dp = DocxPagebreak()
return pf.run_filter(dp.action, doc=doc)


if __name__ == "__main__":
main()
78 changes: 0 additions & 78 deletions sponsors/pdf.py

This file was deleted.

Binary file added sponsors/reference.docx
Binary file not shown.
39 changes: 39 additions & 0 deletions sponsors/tests/test_contracts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from datetime import date
from model_bakery import baker
from unittest.mock import patch, Mock

from django.http import HttpRequest
from django.test import TestCase
from django.utils.dateformat import format

from sponsors.contracts import render_contract_to_docx_response


class TestRenderContract(TestCase):
def setUp(self):
self.contract = baker.make_recipe("sponsors.tests.empty_contract", sponsorship__start_date=date.today())

# DOCX unit test
def test_render_response_with_docx_attachment(self):
request = Mock(HttpRequest)
self.contract.sponsorship.renewal = False
response = render_contract_to_docx_response(request, self.contract)

self.assertEqual(response.get("Content-Disposition"), "attachment; filename=sponsorship-contract-Sponsor.docx")
self.assertEqual(
response.get("Content-Type"),
"application/vnd.openxmlformats-officedocument.wordprocessingml.document"
)


# DOCX unit test
def test_render_renewal_response_with_docx_attachment(self):
request = Mock(HttpRequest)
self.contract.sponsorship.renewal = True
response = render_contract_to_docx_response(request, self.contract)

self.assertEqual(response.get("Content-Disposition"), "attachment; filename=sponsorship-renewal-Sponsor.docx")
self.assertEqual(
response.get("Content-Type"),
"application/vnd.openxmlformats-officedocument.wordprocessingml.document"
)

0 comments on commit a4990b7

Please sign in to comment.