From e52cc8af01501daa7fcb646e0dcd76ee05867c5d Mon Sep 17 00:00:00 2001 From: Lino Galiana Date: Tue, 19 Dec 2023 21:40:01 +0100 Subject: [PATCH] Automatic black formatting for python examples (#477) * black extension * clean * Add a few extensions * script * append to script * write files * update * update * update * remove files * append * update * retour bons fichiers * up * viewof * reorder * update --- .github/workflows/netlify-test.yaml | 1 + .github/workflows/prod.yml | 2 +- .../linogaliana/pip-freeze/_extension.yml | 8 + .../linogaliana/pip-freeze/pip-freeze.lua | 28 +++ .../linogaliana/rev-history/_extension.yml | 8 + .../linogaliana/rev-history/rev-history.lua | 45 +++++ .../black-formatter/_extension.yml | 8 + .../black-formatter/black-formatter.lua | 99 ++++++++++ _quarto.yml | 4 +- build/_handle_git_history.qmd | 113 ++++++++++++ build/_meta_info.qmd | 174 ++++++++++++++++++ build/append_environment.py | 29 +++ content/modelisation/index.qmd | 10 +- requirements.txt | 3 +- 14 files changed, 526 insertions(+), 6 deletions(-) create mode 100644 _extensions/linogaliana/pip-freeze/_extension.yml create mode 100644 _extensions/linogaliana/pip-freeze/pip-freeze.lua create mode 100644 _extensions/linogaliana/rev-history/_extension.yml create mode 100644 _extensions/linogaliana/rev-history/rev-history.lua create mode 100644 _extensions/shafayetShafee/black-formatter/_extension.yml create mode 100644 _extensions/shafayetShafee/black-formatter/black-formatter.lua create mode 100644 build/_handle_git_history.qmd create mode 100644 build/_meta_info.qmd create mode 100644 build/append_environment.py diff --git a/.github/workflows/netlify-test.yaml b/.github/workflows/netlify-test.yaml index b7ead9ecf..22f05c3b1 100644 --- a/.github/workflows/netlify-test.yaml +++ b/.github/workflows/netlify-test.yaml @@ -23,6 +23,7 @@ jobs: run: | quarto --version git diff --name-only origin/master origin/${GITHUB_HEAD_REF} >> diff + python build/append_environment.py python build/tweak_render.py cat _quarto.yml quarto render --to html diff --git a/.github/workflows/prod.yml b/.github/workflows/prod.yml index 4c4ea4b2f..187eabe31 100644 --- a/.github/workflows/prod.yml +++ b/.github/workflows/prod.yml @@ -5,7 +5,6 @@ on: branches: - main - master - - docker jobs: docker: @@ -57,6 +56,7 @@ jobs: run: git config --global --add safe.directory /__w/python-datascientist/python-datascientist - name: Render website run: | + python build/append_environment.py quarto render --to html - name: Publish to Pages if: github.ref == 'refs/heads/master' diff --git a/_extensions/linogaliana/pip-freeze/_extension.yml b/_extensions/linogaliana/pip-freeze/_extension.yml new file mode 100644 index 000000000..d3b73fbac --- /dev/null +++ b/_extensions/linogaliana/pip-freeze/_extension.yml @@ -0,0 +1,8 @@ +title: pip-freeze +author: Lino Galiana +version: 0.1.0 +quarto-required: ">=1.3.0" +contributes: + shortcodes: + - pip-freeze.lua + diff --git a/_extensions/linogaliana/pip-freeze/pip-freeze.lua b/_extensions/linogaliana/pip-freeze/pip-freeze.lua new file mode 100644 index 000000000..16a9d6ebf --- /dev/null +++ b/_extensions/linogaliana/pip-freeze/pip-freeze.lua @@ -0,0 +1,28 @@ +-- run pip and read its output +local function pip(command) + local p = io.popen("pip " .. command) + local output = "" + if p ~= nil then + output = p:read('*a') -- Use '*a' to read the entire output + p:close() + end + return output +end + +return { + ["pip-freeze"] = function(args, kwargs) + local raw_cmd = "list" + local piplist = pip(raw_cmd) + + -- return as string + if piplist ~= nil and piplist ~= "" then + return pandoc.read( + "
\n" .. + piplist .. "
" + ).blocks + else + return "nothing returned" + --return pandoc.Null() + end + end +} diff --git a/_extensions/linogaliana/rev-history/_extension.yml b/_extensions/linogaliana/rev-history/_extension.yml new file mode 100644 index 000000000..be3c8fa80 --- /dev/null +++ b/_extensions/linogaliana/rev-history/_extension.yml @@ -0,0 +1,8 @@ +title: rev-history +author: Lino Galiana +version: 0.1.0 +quarto-required: ">=1.3.0" +contributes: + shortcodes: + - rev-history.lua + diff --git a/_extensions/linogaliana/rev-history/rev-history.lua b/_extensions/linogaliana/rev-history/rev-history.lua new file mode 100644 index 000000000..20f9269c3 --- /dev/null +++ b/_extensions/linogaliana/rev-history/rev-history.lua @@ -0,0 +1,45 @@ +-- run git and read its output +local function git(command) + local p = io.popen("git " .. command) + local output = "" + if p ~= nil then + output = p:read('*all') + p:close() + end + return output +end + +local github_repo = "https://github.com/linogaliana/python-datascientist/commit" + +-- return a table containing shortcode definitions +-- defining shortcodes this way allows us to create helper +-- functions that are not themselves considered shortcodes +return { + ["rev-history"] = function(args, kwargs) + local header = "" .. + "SHA" .. + "Date" .. + "Author" .. + "Description" .. + "\n" + local divider = "" + + -- run the command + local filename = quarto.doc.input_file + local raw_cmd = "log --follow --pretty=format:\"[%h]($repo/%h)%ad%an%s\" --date=format:'%Y-%m-%d %H:%M:%S' -- " + local raw_cmd_sub = string.gsub(raw_cmd, "$repo", github_repo) + local cmd = raw_cmd_sub .. filename + local tags = git(cmd) + + -- return as string + if tags ~= nil then + return pandoc.read( + "" .. + header .. divider .. tags .. + "
\n\n" + ).blocks + else + return pandoc.Null() + end + end +} diff --git a/_extensions/shafayetShafee/black-formatter/_extension.yml b/_extensions/shafayetShafee/black-formatter/_extension.yml new file mode 100644 index 000000000..fa4130810 --- /dev/null +++ b/_extensions/shafayetShafee/black-formatter/_extension.yml @@ -0,0 +1,8 @@ +title: Black-formatter +author: Shafayet Khan Shafee +version: 1.1.1 +quarto-required: ">=1.2.0" +contributes: + filters: + - black-formatter.lua + diff --git a/_extensions/shafayetShafee/black-formatter/black-formatter.lua b/_extensions/shafayetShafee/black-formatter/black-formatter.lua new file mode 100644 index 000000000..4a19c9cd3 --- /dev/null +++ b/_extensions/shafayetShafee/black-formatter/black-formatter.lua @@ -0,0 +1,99 @@ +--[[ +MIT License + +Copyright (c) 2023 Shafayet Khan Shafee + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +]]-- + + +local function isEmpty(s) + return s == nil or s == '' +end + + +local function add_codeblock_label(label) + local labeller = { + -- Add id for code block using the id from div.cell + CodeBlock = function(cb) + if cb.classes:includes('python') then + cb.identifier = label + return cb + end + end + } + return labeller +end + + +local function label_codeblock() + local labelled_cb = { + Div = function(el) + if el.classes:includes("cell") then + local label = el.identifier + return el:walk(add_codeblock_label(label)) + end + end + } + return labelled_cb +end + + +local function black_format() + local black_formatter = { + CodeBlock = function(cb) + if cb.classes:includes('python') then + local randName = string.char( + math.random(97,122), math.random(97,122), + math.random(97,122), math.random(97,122), + math.random(97,122), math.random(97,122) + ) + + local label = cb.identifier + local name + + if isEmpty(label) then + name = "_" .. randName .. "_black_formatted.py" + else + name = "_" .. label .. "_black_formatted.py" + end + + local f = io.open(name, 'w+b') + f:write(cb.text) + f:close() + local formatted = io.popen("python -m black " .. name) + formatted:close() + local formatted_file = io.open(name, 'r') + local formatted_content = formatted_file:read("*all") + formatted_file:close() + os.remove(name) + cb.text = formatted_content + return cb + end + end + } + return black_formatter +end + + +function Pandoc(doc) + local doc = doc:walk(label_codeblock()) + return doc:walk(black_format()) +end + diff --git a/_quarto.yml b/_quarto.yml index 18ffc0e91..0a49162d4 100644 --- a/_quarto.yml +++ b/_quarto.yml @@ -192,7 +192,9 @@ website: filters: - - lightbox + - lightbox + - black-formatter + lightbox: auto format: diff --git a/build/_handle_git_history.qmd b/build/_handle_git_history.qmd new file mode 100644 index 000000000..eb51ee382 --- /dev/null +++ b/build/_handle_git_history.qmd @@ -0,0 +1,113 @@ +```{ojs} +//| echo: false +Inputs.table( + table_commit, + { + format: { + SHA: x => md`[${x}](${github_repo}/commit/${x})`, + Description: x => md`${replacePullRequestPattern(x, github_repo)}`, + /*Date: x => x.toLocaleString("fr", { + "month": "numeric", + "day": "numeric", + "year": "numeric" + }) + */ + } + } +) +``` + +```{ojs} +//| echo: false +Plot.plot({ + marks: [ + Plot.ruleY([0], {stroke: "royalblue"}), + Plot.dot( + table_commit, + Plot.pointerX({x: (d) => new Date(d.date), y: 0, stroke: "red"})), + Plot.dot(table_commit, {x: (d) => new Date(d.Date), y: 0, fill: "royalblue"}) + ] +}) +``` + + +```{ojs} +//| echo: false +function replacePullRequestPattern(inputString, githubRepo) { + // Use a regular expression to match the pattern #digit + var pattern = /#(\d+)/g; + + // Replace the pattern with ${github_repo}/pull/#digit + var replacedString = inputString.replace(pattern, '[#$1](' + githubRepo + '/pull/$1)'); + + return replacedString; +} +``` + + +```{ojs} +//| echo: false +github_repo = "https://github.com/linogaliana/python-datascientist" +``` + +```{ojs} +//| echo: false +table_commit = { + +// Get the HTML table by its class name +var table = document.querySelector('.commit-table'); + +// Check if the table exists +if (table) { + // Initialize an array to store the table data + var dataArray = []; + + // Extract headers from the first row + var headers = []; + for (var i = 0; i < table.rows[0].cells.length; i++) { + headers.push(table.rows[0].cells[i].textContent.trim()); + } + + // Iterate through the rows, starting from the second row + for (var i = 1; i < table.rows.length; i++) { + var row = table.rows[i]; + var rowData = {}; + + // Iterate through the cells in the row + for (var j = 0; j < row.cells.length; j++) { + // Use headers as keys and cell content as values + rowData[headers[j]] = row.cells[j].textContent.trim(); + } + + // Push the rowData object to the dataArray + dataArray.push(rowData); + } + } + + return dataArray + +} +``` + + +```{ojs} +//| echo: false +//| output: false + +// Get the element with class 'git-details' +{ + var gitDetails = document.querySelector('.git-details'); + + // Check if the element exists + if (gitDetails) { + // Hide the element + gitDetails.style.display = 'none'; + } +} +``` + +```{ojs} +//| echo: false +Plot = require('@observablehq/plot@0.6.12/dist/plot.umd.min.js') +``` + diff --git a/build/_meta_info.qmd b/build/_meta_info.qmd new file mode 100644 index 000000000..6d1450387 --- /dev/null +++ b/build/_meta_info.qmd @@ -0,0 +1,174 @@ +## Informations additionnelles {-} + + +
+ + +{{< fa brands python >}} environment files have been tested on. + + + +```{python} +#| echo: false +#| output: asis +from datetime import datetime +today = datetime.today().strftime('%Y-%m-%d') +print( + f"Latest built version: {today}" +) +``` + +Python version used: + +```{python} +#| echo: false +import sys +sys.version +``` + + +{{< pip-freeze >}} + + +
+ + +
+ + +View file history {{< fa brands git-alt >}} + + + +```{ojs} +//| echo: false +html`
${git_history_table}
` +``` + +```{ojs} +//| echo: false +html`
${git_history_plot}
` +``` + +{{< rev-history >}} + + +
+ +```{ojs} +//| echo: false +//| output: false +git_history_table = Inputs.table( + table_commit, + { + format: { + SHA: x => md`[${x}](${github_repo}/commit/${x})`, + Description: x => md`${replacePullRequestPattern(x, github_repo)}`, + /*Date: x => x.toLocaleString("fr", { + "month": "numeric", + "day": "numeric", + "year": "numeric" + }) + */ + } + } +) +``` + +```{ojs} +//| echo: false +//| output: false +git_history_plot = Plot.plot({ + marks: [ + Plot.ruleY([0], {stroke: "royalblue"}), + Plot.dot( + table_commit, + Plot.pointerX({x: (d) => new Date(d.date), y: 0, stroke: "red"})), + Plot.dot(table_commit, {x: (d) => new Date(d.Date), y: 0, fill: "royalblue"}) + ] +}) +``` + + +```{ojs} +//| echo: false +function replacePullRequestPattern(inputString, githubRepo) { + // Use a regular expression to match the pattern #digit + var pattern = /#(\d+)/g; + + // Replace the pattern with ${github_repo}/pull/#digit + var replacedString = inputString.replace(pattern, '[#$1](' + githubRepo + '/pull/$1)'); + + return replacedString; +} +``` + + +```{ojs} +//| echo: false +github_repo = "https://github.com/linogaliana/python-datascientist" +``` + +```{ojs} +//| echo: false +table_commit = { + +// Get the HTML table by its class name +var table = document.querySelector('.commit-table'); + +// Check if the table exists +if (table) { + // Initialize an array to store the table data + var dataArray = []; + + // Extract headers from the first row + var headers = []; + for (var i = 0; i < table.rows[0].cells.length; i++) { + headers.push(table.rows[0].cells[i].textContent.trim()); + } + + // Iterate through the rows, starting from the second row + for (var i = 1; i < table.rows.length; i++) { + var row = table.rows[i]; + var rowData = {}; + + // Iterate through the cells in the row + for (var j = 0; j < row.cells.length; j++) { + // Use headers as keys and cell content as values + rowData[headers[j]] = row.cells[j].textContent.trim(); + } + + // Push the rowData object to the dataArray + dataArray.push(rowData); + } + } + + return dataArray + +} +``` + + +```{ojs} +//| echo: false +//| output: false + +// Get the element with class 'git-details' +{ + var gitDetails = document.querySelector('.git-details'); + + // Check if the element exists + if (gitDetails) { + // Hide the element + gitDetails.style.display = 'none'; + } +} +``` + +```{ojs} +//| echo: false +Plot = require('@observablehq/plot@0.6.12/dist/plot.umd.min.js') +``` + + + diff --git a/build/append_environment.py b/build/append_environment.py new file mode 100644 index 000000000..4005f4a54 --- /dev/null +++ b/build/append_environment.py @@ -0,0 +1,29 @@ +import yaml +from tweak_markdown import read_file, write_file + + +def process_quarto_config(quarto_config_file): + with open(quarto_config_file, "r", encoding="utf-8") as yml_file: + quarto_config = yaml.safe_load(yml_file) + + render_files = quarto_config.get("project", {}).get("render", []) + + return render_files + + +def append_meta_to_file(filename): + qmd_files = [filename, "build/_meta_info.qmd"] + + combined_content = [read_file(qmd_file) for qmd_file in qmd_files] + + combined_content = "\n\n".join(combined_content) + + write_file(filename, combined_content) + + +list_qmd = process_quarto_config("_quarto.yml") +list_qmd = [fileqmd for fileqmd in list_qmd if fileqmd != "index.qmd"] + +if __name__ == "__main__": + for files in list_qmd: + append_meta_to_file(files) diff --git a/content/modelisation/index.qmd b/content/modelisation/index.qmd index 54b16e9ee..f6920ce9b 100644 --- a/content/modelisation/index.qmd +++ b/content/modelisation/index.qmd @@ -306,7 +306,8 @@ taux de chômage, variables d'éducation) proviennent de l'USDA ([source](https: Le code pour construire une base unique à partir de ces sources diverses est disponible ci-dessous : -```{python class.output = "python"} +::: {.python} +```{python} #| echo: false with open('get_data.py', 'r') as f: @@ -315,9 +316,11 @@ with open('get_data.py', 'r') as f: print(line, end='') ``` +::: + Cette partie n'est absolument pas exhaustive. Elle constitue un point d'entrée dans le sujet à partir d'une série d'exemples sur un fil rouge. -De nombreux modèles plus appronfondis, que ce soit en économétrie ou en _machine learning_ +De nombreux modèles plus approfondis, que ce soit en économétrie ou en _machine learning_ mériteraient d'être évoqués. Pour les personnes désirant en savoir plus sur les modèles économétriques, qui seront moins évoqués que ceux de _machine learning_, je recommande la lecture de @Turrell2021. @@ -327,4 +330,5 @@ je recommande la lecture de @Turrell2021. ## Références ::: {#refs} -::: \ No newline at end of file +::: + diff --git a/requirements.txt b/requirements.txt index 4fcef06c1..7548b1399 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,4 +13,5 @@ scikit-image webdriver-manager spacy gensim -duckdb \ No newline at end of file +duckdb +black \ No newline at end of file