Skip to content

Commit 6d37e77

Browse files
authored
Correction des problèmes du notebook Pandas (#653)
* premiers pb * Ajoute shortcode
1 parent 378b872 commit 6d37e77

File tree

6 files changed

+74
-94
lines changed

6 files changed

+74
-94
lines changed
Lines changed: 26 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -1,86 +1,30 @@
1-
-- Liste des types de callouts et leur classe associée
2-
local callouts_all = {
3-
caution = "callout-caution",
4-
important = "callout-important",
5-
tip = "callout-tip",
6-
note = "callout-note",
7-
warning = "callout-warning",
8-
exercise = "callout-exercise"
9-
}
10-
11-
-- Fonction pour déterminer le type d’un callout à partir de ses classes
12-
function get_callout_type(div)
13-
for key, class in pairs(callouts_all) do
14-
if div.classes:includes(class) then
15-
return key
16-
end
17-
end
18-
return nil
19-
end
20-
21-
-- Applique le collapse si défini dans les métadonnées
22-
function applyCollapse(div, type, collapse_meta)
23-
if collapse_meta == nil then return div end
24-
25-
local collapse_val = nil
26-
if collapse_meta.all ~= nil then
27-
collapse_val = collapse_meta.all
28-
elseif collapse_meta[type] ~= nil then
29-
collapse_val = collapse_meta[type]
30-
end
31-
32-
if collapse_val ~= nil then
33-
div.attributes["collapse"] = tostring(collapse_val)
34-
end
35-
36-
return div
37-
end
38-
39-
-- Création du callout stylisé
40-
function createCallout(div, type, collapse_meta)
41-
local title = type:gsub("^%l", string.upper)
42-
43-
if div.content[1] and div.content[1].t == "Header" then
44-
title = pandoc.utils.stringify(div.content[1])
45-
div.content:remove(1)
46-
end
47-
48-
-- Appliquer collapse si spécifié
49-
div = applyCollapse(div, type, collapse_meta)
50-
51-
if quarto.doc.is_format("ipynb") then
52-
if type == "exercise" then
53-
type = "tip"
1+
-- filters/warninglang.lua
2+
function Div(el)
3+
-- On ne traite que les divs avec classe "warninglang"
4+
if el.classes:includes("warninglang") then
5+
local filename_relative = quarto.doc.input_file
6+
local html_text
7+
8+
if filename_relative:find("/en/") then
9+
-- version anglaise → lien vers la française
10+
local link = filename_relative:gsub("/en/", "/")
11+
html_text =
12+
"This is the English 🇬🇧 🇺🇸 version of this chapter, " ..
13+
"to see the French version go " ..
14+
"<a href=\"" .. link .. "\">there</a>."
15+
else
16+
-- version française → lien vers l’anglaise
17+
local link = filename_relative:gsub("/content/", "/en/content/")
18+
html_text =
19+
"Ceci est la version française 🇫🇷 de ce chapitre, " ..
20+
"pour voir la version anglaise allez " ..
21+
"<a href=\"" .. link .. "\">ici</a>."
5422
end
5523

56-
local html_start = "<div class=\"callout callout-style-default callout-" .. type .. " callout-titled\">\n" ..
57-
"<div class=\"callout-header d-flex align-content-center\">\n" ..
58-
"<div class=\"callout-icon-container\">\n<i class=\"callout-icon\"></i>\n</div>\n" ..
59-
"<div class=\"callout-title-container flex-fill\">\n" .. title .. "\n</div>\n</div>\n" ..
60-
"<div class=\"callout-body-container callout-body\">\n"
61-
62-
local html_end = "</div>\n</div>"
63-
64-
div.content:insert(1, pandoc.RawBlock("html", html_start))
65-
div.content:insert(pandoc.RawBlock("html", html_end))
66-
67-
return div
68-
else
69-
return quarto.Callout({
70-
type = type,
71-
title = title,
72-
content = div.content,
73-
icon = div.attributes["icon"],
74-
collapse = div.attributes["collapse"] == "true"
75-
})
76-
end
77-
end
78-
79-
-- Hook principal
80-
function Div(div)
81-
local callout_type = get_callout_type(div)
82-
if callout_type ~= nil then
83-
local collapse_meta = quarto.doc and quarto.doc.metadata and quarto.doc.metadata["collapse-callout"]
84-
return createCallout(div, callout_type, collapse_meta)
24+
-- Retourne un bloc callout-note contenant ce texte
25+
return pandoc.Div(
26+
{ pandoc.Para{ pandoc.RawInline("html", html_text) } },
27+
pandoc.Attr("", { "callout-note" })
28+
)
8529
end
8630
end
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
-- filters/warninglang.lua
2+
function Div(el)
3+
if el.classes:includes("warninglang") then
4+
local filename_relative = quarto.doc.input_file
5+
local html_text
6+
7+
if filename_relative:find("/en/") then
8+
local link = filename_relative:gsub("/en/", "/")
9+
html_text =
10+
"This is the English 🇬🇧 🇺🇸 version of this chapter, " ..
11+
"to see the French version go " ..
12+
"<a href=\"https://pythonds.linogaliana.fr/" .. link .. "\">there</a>."
13+
else
14+
local link = filename_relative:gsub("/content/", "/en/content/")
15+
html_text =
16+
"Ceci est la version française 🇫🇷 de ce chapitre, " ..
17+
"pour voir la version anglaise allez " ..
18+
"<a href=\"https://pythonds.linogaliana.fr/" .. link .. "\">ici</a>."
19+
end
20+
21+
-- Ici on réintroduit bien le callout
22+
return pandoc.Div(
23+
{ pandoc.Para{ pandoc.RawInline("html", html_text) } },
24+
pandoc.Attr("", { "callout", "callout-note" })
25+
)
26+
end
27+
end

_quarto.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ project:
44
- index.qmd
55
- 404.qmd
66
- content/getting-started/index.qmd
7-
- content/manipulation/04a_webscraping_TP.qmd
7+
- content/manipulation/01_numpy.qmd
88
- content/modelisation/index.qmd
99
- content/visualisation/index.qmd
1010
- content/annexes/evaluation.qmd
@@ -78,6 +78,8 @@ filters:
7878
path: _extensions/linogaliana/lang-switch/lang-notebook.lua
7979
- at: pre-ast
8080
path: _extensions/linogaliana/callout/callout-notebook.lua
81+
- at: pre-ast
82+
path: _extensions/linogaliana/lang-switch/warninglang.lua
8183
- include-code-files
8284
- build/replace-title.lua
8385
- _extensions/linogaliana/details-iframe/details.lua

content/manipulation/01_numpy.qmd

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,10 @@ echo: false
1414
printMessage="true"
1515
>}}
1616
17-
::: {.content-visible when-format="ipynb"}
18-
{{warninglang}}
17+
:::: {.content-visible when-format="ipynb"}
18+
::: {.warninglang .callout-warning}
1919
:::
20+
::::
2021

2122
# Introduction
2223

content/manipulation/02_pandas_intro.qmd

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -128,15 +128,15 @@ l'association de données pour construire des statistiques descriptives.
128128

129129
The `Pandas` package has been the central piece of the data science ecosystem for about a decade. The _DataFrame_, a central object in languages like `R` or `Stata`, had long been absent in the `Python` ecosystem. Yet, thanks to `Numpy`, all the basic components were present but needed to be reconfigured to meet the needs of data scientists.
130130

131-
Wes McKinney, when he built `Pandas` to provide a dataframe leveraging the numerical computation library `Numpy` in the background, enabled a significant leap forward for `Python` in data analysis, explaining its popularity in the data science ecosystem. `Pandas` is not without limitations[^tidyverse], which we will have the opportunity to discuss, but the vast array of analysis methods it offers greatly simplifies data analysis work. For more information on this package, the reference book by @mckinney2012python presents many of the package's features.
131+
Wes McKinney, when he built `Pandas` to provide a dataframe leveraging the numerical computation library `Numpy` in the background, enabled a significant leap forward for `Python` in data analysis, explaining its popularity in the data science ecosystem. `Pandas` is not without limitations[^tidyverse-en], which we will have the opportunity to discuss, but the vast array of analysis methods it offers greatly simplifies data analysis work. For more information on this package, the reference book by @mckinney2012python presents many of the package's features.
132132

133-
[^tidyverse]: The equivalent ecosystem in `R`, the [`tidyverse`](https://www.tidyverse.org/), developed by _Posit_, is of more recent design than `Pandas`. Its philosophy could thus draw inspiration from `Pandas` while addressing some limitations of the `Pandas` syntax. Since both syntaxes are an implementation in `Python` or `R` of the `SQL` philosophy, it is natural that they resemble each other and that it is pertinent for data scientists to know both languages.
133+
[^tidyverse-en]: The equivalent ecosystem in `R`, the [`tidyverse`](https://www.tidyverse.org/), developed by _Posit_, is of more recent design than `Pandas`. Its philosophy could thus draw inspiration from `Pandas` while addressing some limitations of the `Pandas` syntax. Since both syntaxes are an implementation in `Python` or `R` of the `SQL` philosophy, it is natural that they resemble each other and that it is pertinent for data scientists to know both languages.
134134

135135
In this chapter, we will focus on the most relevant elements in the context of an introduction to data science, leaving interested users to deepen their knowledge with the abundant resources available on the subject.
136136

137-
As datasets generally gain value by associating multiple sources, for example, to relate a record to contextual data or to link two client databases to obtain meaningful data, the next chapter will present how to merge different datasets with `Pandas`. By the end of the next chapter, thanks to data merging, we will have a detailed database on the carbon footprints of the French[^empreinte].
137+
As datasets generally gain value by associating multiple sources, for example, to relate a record to contextual data or to link two client databases to obtain meaningful data, the next chapter will present how to merge different datasets with `Pandas`. By the end of the next chapter, thanks to data merging, we will have a detailed database on the carbon footprints of the French[^empreinte-en].
138138

139-
[^empreinte]: Actually, it is not the carbon footprint but the __national inventory__ since the database corresponds to a production view, not consumption. Emissions made in one municipality to satisfy the consumption of another will be attributed to the former where the carbon footprint concept would attribute it to the latter. Moreover, the emissions presented here do not include those produced by goods made abroad. This exercise is not about constructing a reliable statistic but rather understanding the logic of data merging to construct descriptive statistics.
139+
[^empreinte-en]: Actually, it is not the carbon footprint but the __national inventory__ since the database corresponds to a production view, not consumption. Emissions made in one municipality to satisfy the consumption of another will be attributed to the former where the carbon footprint concept would attribute it to the latter. Moreover, the emissions presented here do not include those produced by goods made abroad. This exercise is not about constructing a reliable statistic but rather understanding the logic of data merging to construct descriptive statistics.
140140

141141
:::
142142

@@ -285,9 +285,9 @@ Les types les plus simples (`int` ou `float`) correspondent aux valeurs numériq
285285
::: {.content-visible when-profile="en"}
286286
## Before seeing `DataFrame`, we need to know `Pandas` `Series`
287287

288-
In fact, a _DataFrame_ is a collection of objects called `pandas.Series`. These `Series` are one-dimensional objects that are extensions of the one-dimensional `Numpy` arrays[^numpyarrow]. In particular, to facilitate the handling of categorical or temporal data, additional variable types are available in `Pandas` compared to `Numpy` (`categorical`, `datetime64`, and `timedelta64`). These types are associated with optimized methods to facilitate the processing of this data.
288+
In fact, a _DataFrame_ is a collection of objects called `pandas.Series`. These `Series` are one-dimensional objects that are extensions of the one-dimensional `Numpy` arrays[^numpyarrow-en]. In particular, to facilitate the handling of categorical or temporal data, additional variable types are available in `Pandas` compared to `Numpy` (`categorical`, `datetime64`, and `timedelta64`). These types are associated with optimized methods to facilitate the processing of this data.
289289

290-
[^numpyarrow]: The original goal of `Pandas` is to provide a high-level library for more abstract low-level layers, such as `Numpy` arrays. `Pandas` is gradually changing these low-level layers to favor `Arrow` over `Numpy` without destabilizing the high-level commands familiar to `Pandas` users. This shift is due to the fact that `Arrow`, a low-level computation library, is more powerful and flexible than `Numpy`. For example, `Numpy` offers limited textual types, whereas `Arrow` provides greater freedom.
290+
[^numpyarrow-en]: The original goal of `Pandas` is to provide a high-level library for more abstract low-level layers, such as `Numpy` arrays. `Pandas` is gradually changing these low-level layers to favor `Arrow` over `Numpy` without destabilizing the high-level commands familiar to `Pandas` users. This shift is due to the fact that `Arrow`, a low-level computation library, is more powerful and flexible than `Numpy`. For example, `Numpy` offers limited textual types, whereas `Arrow` provides greater freedom.
291291

292292
There are several possible types for a `pandas.Series`, extending the basic data types in `Python`, which will determine the behavior of this variable. Indeed, many operations do not have the same meaning depending on whether the value is numeric or not.
293293

content/manipulation/02_pandas_intro/_exo1.qmd

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
::: {.content-visible when-profile="fr"}
22

3-
:::: {.callout-tip}
3+
:::: {.cell .markdown}
4+
::::: {.callout-tip}
45
## Exercice 1: Importer un CSV et explorer la structure de données
56

67
1. Importer les données de l'Ademe à l'aide du package `Pandas` et de la commande consacrée pour l'import de csv. Nommer le `DataFrame` obtenu `emissions`[^nomdf-fr].
@@ -21,7 +22,7 @@ en ligne pour découvrir cette fonction.
2122
</details>
2223

2324

24-
::::
25+
:::::
2526

2627
[^nomdf-fr]: Par manque d'imagination, on est souvent tenté d'appeler notre
2728
_dataframe_ principal `df` ou `data`. C'est souvent une mauvaise idée puisque
@@ -30,12 +31,15 @@ plus tard. L'autodocumentation, approche qui consiste à avoir un code
3031
qui se comprend de lui-même, est une bonne pratique et il est donc recommandé
3132
de donner un nom simple mais efficace pour connaître la nature du _dataset_ en question.
3233

34+
::::
35+
3336
:::
3437

3538

3639
::: {.content-visible when-profile="en"}
3740

38-
:::: {.callout-tip}
41+
:::: {.cell .markdown}
42+
::::: {.callout-tip}
3943
## Exercise 1: Importing a CSV and Exploring Data Structure
4044

4145
1. Import the data from Ademe using the `Pandas` package and the dedicated command for importing CSVs. Name the obtained `DataFrame` `emissions`[^nomdf-en].
@@ -58,4 +62,6 @@ Read the documentation for `read_csv` (very well done) or look for examples onli
5862

5963
[^nomdf-en]: Due to a lack of imagination, we are often tempted to call our main _dataframe_ `df` or `data`. This is often a bad idea because the name is not very informative when you read the code a few weeks later. Self-documenting code, an approach that consists of having code that is self-explanatory, is a good practice, and it is recommended to give a simple yet effective name to know the nature of the dataset in question.
6064

65+
::::
66+
6167
:::

0 commit comments

Comments
 (0)