Correction des problèmes du notebook Pandas (#653)

linogaliana · web-flow · commit 6d37e77a8607 · 2025-09-23T22:58:21.000+02:00
* premiers pb

* Ajoute shortcode
diff --git a/_extensions/linogaliana/callout/callout-notebook.lua b/_extensions/linogaliana/callout/callout-notebook.lua
@@ -1,86 +1,30 @@
--- Liste des types de callouts et leur classe associée
-local callouts_all = {
-  caution   = "callout-caution", 
-  important = "callout-important",
-  tip       = "callout-tip", 
-  note      = "callout-note", 
-  warning   = "callout-warning",
-  exercise  = "callout-exercise"
-}
-
--- Fonction pour déterminer le type d’un callout à partir de ses classes
-function get_callout_type(div)
-  for key, class in pairs(callouts_all) do
-    if div.classes:includes(class) then
-      return key
-    end
-  end
-  return nil
-end
-
--- Applique le collapse si défini dans les métadonnées
-function applyCollapse(div, type, collapse_meta)
-  if collapse_meta == nil then return div end
-
-  local collapse_val = nil
-  if collapse_meta.all ~= nil then
-    collapse_val = collapse_meta.all
-  elseif collapse_meta[type] ~= nil then
-    collapse_val = collapse_meta[type]
-  end
-
-  if collapse_val ~= nil then
-    div.attributes["collapse"] = tostring(collapse_val)
-  end
-
-  return div
-end
-
--- Création du callout stylisé
-function createCallout(div, type, collapse_meta)
-  local title = type:gsub("^%l", string.upper)
-
-  if div.content[1] and div.content[1].t == "Header" then
-    title = pandoc.utils.stringify(div.content[1])
-    div.content:remove(1)
-  end
-
-  -- Appliquer collapse si spécifié
-  div = applyCollapse(div, type, collapse_meta)
-
-  if quarto.doc.is_format("ipynb") then
-    if type == "exercise" then
-      type = "tip"
+-- filters/warninglang.lua
+function Div(el)
+  -- On ne traite que les divs avec classe "warninglang"
+  if el.classes:includes("warninglang") then
+    local filename_relative = quarto.doc.input_file
+    local html_text
+
+    if filename_relative:find("/en/") then
+      -- version anglaise → lien vers la française
+      local link = filename_relative:gsub("/en/", "/")
+      html_text =
+        "This is the English 🇬🇧 🇺🇸 version of this chapter, " ..
+        "to see the French version go " ..
+        "<a href=\"" .. link .. "\">there</a>."
+    else
+      -- version française → lien vers l’anglaise
+      local link = filename_relative:gsub("/content/", "/en/content/")
+      html_text =
+        "Ceci est la version française 🇫🇷 de ce chapitre, " ..
+        "pour voir la version anglaise allez " ..
+        "<a href=\"" .. link .. "\">ici</a>."
     end
 
-    local html_start = "<div class=\"callout callout-style-default callout-" .. type .. " callout-titled\">\n" ..
-      "<div class=\"callout-header d-flex align-content-center\">\n" ..
-      "<div class=\"callout-icon-container\">\n<i class=\"callout-icon\"></i>\n</div>\n" ..
-      "<div class=\"callout-title-container flex-fill\">\n" .. title .. "\n</div>\n</div>\n" ..
-      "<div class=\"callout-body-container callout-body\">\n"
-
-    local html_end = "</div>\n</div>"
-
-    div.content:insert(1, pandoc.RawBlock("html", html_start))
-    div.content:insert(pandoc.RawBlock("html", html_end))
-
-    return div
-  else
-    return quarto.Callout({
-      type = type,
-      title = title,
-      content = div.content,
-      icon = div.attributes["icon"],
-      collapse = div.attributes["collapse"] == "true"
-    })
-  end
-end
-
--- Hook principal
-function Div(div)
-  local callout_type = get_callout_type(div)
-  if callout_type ~= nil then
-    local collapse_meta = quarto.doc and quarto.doc.metadata and quarto.doc.metadata["collapse-callout"]
-    return createCallout(div, callout_type, collapse_meta)
+    -- Retourne un bloc callout-note contenant ce texte
+    return pandoc.Div(
+      { pandoc.Para{ pandoc.RawInline("html", html_text) } },
+      pandoc.Attr("", { "callout-note" })
+    )
   end
 end
diff --git a/_extensions/linogaliana/lang-switch/warninglang.lua b/_extensions/linogaliana/lang-switch/warninglang.lua
@@ -0,0 +1,27 @@
+-- filters/warninglang.lua
+function Div(el)
+  if el.classes:includes("warninglang") then
+    local filename_relative = quarto.doc.input_file
+    local html_text
+
+    if filename_relative:find("/en/") then
+      local link = filename_relative:gsub("/en/", "/")
+      html_text =
+        "This is the English 🇬🇧 🇺🇸 version of this chapter, " ..
+        "to see the French version go " ..
+        "<a href=\"https://pythonds.linogaliana.fr/" .. link .. "\">there</a>."
+    else
+      local link = filename_relative:gsub("/content/", "/en/content/")
+      html_text =
+        "Ceci est la version française 🇫🇷 de ce chapitre, " ..
+        "pour voir la version anglaise allez " ..
+        "<a href=\"https://pythonds.linogaliana.fr/" .. link .. "\">ici</a>."
+    end
+
+    -- Ici on réintroduit bien le callout
+    return pandoc.Div(
+      { pandoc.Para{ pandoc.RawInline("html", html_text) } },
+      pandoc.Attr("", { "callout", "callout-note" })
+    )
+  end
+end
diff --git a/_quarto.yml b/_quarto.yml
@@ -4,7 +4,7 @@ project:
     - index.qmd
     - 404.qmd
     - content/getting-started/index.qmd
-    - content/manipulation/04a_webscraping_TP.qmd
+    - content/manipulation/01_numpy.qmd
     - content/modelisation/index.qmd
     - content/visualisation/index.qmd
     - content/annexes/evaluation.qmd
@@ -78,6 +78,8 @@ filters:
     path: _extensions/linogaliana/lang-switch/lang-notebook.lua
   - at: pre-ast
     path: _extensions/linogaliana/callout/callout-notebook.lua
+  - at: pre-ast
+    path: _extensions/linogaliana/lang-switch/warninglang.lua
   - include-code-files
   - build/replace-title.lua
   - _extensions/linogaliana/details-iframe/details.lua
diff --git a/content/manipulation/01_numpy.qmd b/content/manipulation/01_numpy.qmd
@@ -14,9 +14,10 @@ echo: false
     printMessage="true"
 >}}
 
-::: {.content-visible when-format="ipynb"}
-{{warninglang}}
+:::: {.content-visible when-format="ipynb"}
+::: {.warninglang .callout-warning}
 :::
+::::
 
 # Introduction
 
diff --git a/content/manipulation/02_pandas_intro.qmd b/content/manipulation/02_pandas_intro.qmd
@@ -128,15 +128,15 @@ l'association de données pour construire des statistiques descriptives.
 
 The `Pandas` package has been the central piece of the data science ecosystem for about a decade. The _DataFrame_, a central object in languages like `R` or `Stata`, had long been absent in the `Python` ecosystem. Yet, thanks to `Numpy`, all the basic components were present but needed to be reconfigured to meet the needs of data scientists.
 
-Wes McKinney, when he built `Pandas` to provide a dataframe leveraging the numerical computation library `Numpy` in the background, enabled a significant leap forward for `Python` in data analysis, explaining its popularity in the data science ecosystem. `Pandas` is not without limitations[^tidyverse], which we will have the opportunity to discuss, but the vast array of analysis methods it offers greatly simplifies data analysis work. For more information on this package, the reference book by @mckinney2012python presents many of the package's features.
+Wes McKinney, when he built `Pandas` to provide a dataframe leveraging the numerical computation library `Numpy` in the background, enabled a significant leap forward for `Python` in data analysis, explaining its popularity in the data science ecosystem. `Pandas` is not without limitations[^tidyverse-en], which we will have the opportunity to discuss, but the vast array of analysis methods it offers greatly simplifies data analysis work. For more information on this package, the reference book by @mckinney2012python presents many of the package's features.
 
-[^tidyverse]: The equivalent ecosystem in `R`, the [`tidyverse`](https://www.tidyverse.org/), developed by _Posit_, is of more recent design than `Pandas`. Its philosophy could thus draw inspiration from `Pandas` while addressing some limitations of the `Pandas` syntax. Since both syntaxes are an implementation in `Python` or `R` of the `SQL` philosophy, it is natural that they resemble each other and that it is pertinent for data scientists to know both languages.
+[^tidyverse-en]: The equivalent ecosystem in `R`, the [`tidyverse`](https://www.tidyverse.org/), developed by _Posit_, is of more recent design than `Pandas`. Its philosophy could thus draw inspiration from `Pandas` while addressing some limitations of the `Pandas` syntax. Since both syntaxes are an implementation in `Python` or `R` of the `SQL` philosophy, it is natural that they resemble each other and that it is pertinent for data scientists to know both languages.
 
 In this chapter, we will focus on the most relevant elements in the context of an introduction to data science, leaving interested users to deepen their knowledge with the abundant resources available on the subject.
 
-As datasets generally gain value by associating multiple sources, for example, to relate a record to contextual data or to link two client databases to obtain meaningful data, the next chapter will present how to merge different datasets with `Pandas`. By the end of the next chapter, thanks to data merging, we will have a detailed database on the carbon footprints of the French[^empreinte].
+As datasets generally gain value by associating multiple sources, for example, to relate a record to contextual data or to link two client databases to obtain meaningful data, the next chapter will present how to merge different datasets with `Pandas`. By the end of the next chapter, thanks to data merging, we will have a detailed database on the carbon footprints of the French[^empreinte-en].
 
-[^empreinte]: Actually, it is not the carbon footprint but the __national inventory__ since the database corresponds to a production view, not consumption. Emissions made in one municipality to satisfy the consumption of another will be attributed to the former where the carbon footprint concept would attribute it to the latter. Moreover, the emissions presented here do not include those produced by goods made abroad. This exercise is not about constructing a reliable statistic but rather understanding the logic of data merging to construct descriptive statistics.
+[^empreinte-en]: Actually, it is not the carbon footprint but the __national inventory__ since the database corresponds to a production view, not consumption. Emissions made in one municipality to satisfy the consumption of another will be attributed to the former where the carbon footprint concept would attribute it to the latter. Moreover, the emissions presented here do not include those produced by goods made abroad. This exercise is not about constructing a reliable statistic but rather understanding the logic of data merging to construct descriptive statistics.
 
 :::
 
@@ -285,9 +285,9 @@ Les types les plus simples (`int` ou `float`) correspondent aux valeurs numériq
 ::: {.content-visible when-profile="en"}
 ## Before seeing `DataFrame`, we need to know `Pandas` `Series`
 
-In fact, a _DataFrame_ is a collection of objects called `pandas.Series`. These `Series` are one-dimensional objects that are extensions of the one-dimensional `Numpy` arrays[^numpyarrow]. In particular, to facilitate the handling of categorical or temporal data, additional variable types are available in `Pandas` compared to `Numpy` (`categorical`, `datetime64`, and `timedelta64`). These types are associated with optimized methods to facilitate the processing of this data.
+In fact, a _DataFrame_ is a collection of objects called `pandas.Series`. These `Series` are one-dimensional objects that are extensions of the one-dimensional `Numpy` arrays[^numpyarrow-en]. In particular, to facilitate the handling of categorical or temporal data, additional variable types are available in `Pandas` compared to `Numpy` (`categorical`, `datetime64`, and `timedelta64`). These types are associated with optimized methods to facilitate the processing of this data.
 
-[^numpyarrow]: The original goal of `Pandas` is to provide a high-level library for more abstract low-level layers, such as `Numpy` arrays. `Pandas` is gradually changing these low-level layers to favor `Arrow` over `Numpy` without destabilizing the high-level commands familiar to `Pandas` users. This shift is due to the fact that `Arrow`, a low-level computation library, is more powerful and flexible than `Numpy`. For example, `Numpy` offers limited textual types, whereas `Arrow` provides greater freedom.
+[^numpyarrow-en]: The original goal of `Pandas` is to provide a high-level library for more abstract low-level layers, such as `Numpy` arrays. `Pandas` is gradually changing these low-level layers to favor `Arrow` over `Numpy` without destabilizing the high-level commands familiar to `Pandas` users. This shift is due to the fact that `Arrow`, a low-level computation library, is more powerful and flexible than `Numpy`. For example, `Numpy` offers limited textual types, whereas `Arrow` provides greater freedom.
 
 There are several possible types for a `pandas.Series`, extending the basic data types in `Python`, which will determine the behavior of this variable. Indeed, many operations do not have the same meaning depending on whether the value is numeric or not.
 
diff --git a/content/manipulation/02_pandas_intro/_exo1.qmd b/content/manipulation/02_pandas_intro/_exo1.qmd
@@ -1,6 +1,7 @@
 ::: {.content-visible when-profile="fr"}
 
-:::: {.callout-tip}
+:::: {.cell .markdown}
+::::: {.callout-tip}
 ## Exercice 1: Importer un CSV et explorer la structure de données
 
 1. Importer les données de l'Ademe à l'aide du package `Pandas` et de la commande consacrée pour l'import de csv. Nommer le `DataFrame` obtenu `emissions`[^nomdf-fr].
@@ -21,7 +22,7 @@ en ligne pour découvrir cette fonction.
 </details>
 
 
-::::
+:::::
 
 [^nomdf-fr]: Par manque d'imagination, on est souvent tenté d'appeler notre
 _dataframe_ principal `df` ou `data`. C'est souvent une mauvaise idée puisque
@@ -30,12 +31,15 @@ plus tard. L'autodocumentation, approche qui consiste à avoir un code
 qui se comprend de lui-même, est une bonne pratique et il est donc recommandé
 de donner un nom simple mais efficace pour connaître la nature du _dataset_ en question.
 
+::::
+
 :::
 
 
 ::: {.content-visible when-profile="en"}
 
-:::: {.callout-tip}
+:::: {.cell .markdown}
+::::: {.callout-tip}
 ## Exercise 1: Importing a CSV and Exploring Data Structure
 
 1. Import the data from Ademe using the `Pandas` package and the dedicated command for importing CSVs. Name the obtained `DataFrame` `emissions`[^nomdf-en].
@@ -58,4 +62,6 @@ Read the documentation for `read_csv` (very well done) or look for examples onli
 
 [^nomdf-en]: Due to a lack of imagination, we are often tempted to call our main _dataframe_ `df` or `data`. This is often a bad idea because the name is not very informative when you read the code a few weeks later. Self-documenting code, an approach that consists of having code that is self-explanatory, is a good practice, and it is recommended to give a simple yet effective name to know the nature of the dataset in question.
 
+::::
+
 :::