From e0d9d7861878747e526e1c7074ddbdfbe5967665 Mon Sep 17 00:00:00 2001
From: Alan Crosswell <alan@columbia.edu>
Date: Fri, 13 Jan 2023 16:11:02 -0500
Subject: [PATCH 1/3] replace `str.strip()` with `str.replace()`

`str.strip()` actually removes each character in the character class string. So `.strip('gpdaPerc_')` would also do the same thing and is confusing.
I made a live typo during the workshop and a student asked me how was it that it still worked when I had add `gdp_Percap_` even though there's no `_` between `gdp` and `Per`!

Reference: https://pandas.pydata.org/docs/reference/api/pandas.Series.str.strip.html and https://pandas.pydata.org/docs/reference/api/pandas.Series.str.replace.html
---
 _episodes/09-plotting.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/_episodes/09-plotting.md b/_episodes/09-plotting.md
index 219d6cb95..33b6b843b 100644
--- a/_episodes/09-plotting.md
+++ b/_episodes/09-plotting.md
@@ -71,10 +71,10 @@ data = pd.read_csv('data/gapminder_gdp_oceania.csv', index_col='country')
 # Extract year from last 4 characters of each column name
 # The current column names are structured as 'gdpPercap_(year)', 
 # so we want to keep the (year) part only for clarity when plotting GDP vs. years
-# To do this we use strip(), which removes from the string the characters stated in the argument
-# This method works on strings, so we call str before strip()
+# To do this we use replace(), which removes from the string the characters stated in the argument
+# This method works on strings, so we call str before replace()
 
-years = data.columns.str.strip('gdpPercap_')
+years = data.columns.str.replace('gdpPercap_', '')
 
 # Convert year values to integers, saving results back to dataframe
 

From 36eb551196bda6b50de9994d7e6ac624e71e2eac Mon Sep 17 00:00:00 2001
From: Alan Crosswell <alan@columbia.edu>
Date: Wed, 18 Jan 2023 10:04:00 -0500
Subject: [PATCH 2/3] Updates per @alee review.

---
 _episodes/09-plotting.md | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/_episodes/09-plotting.md b/_episodes/09-plotting.md
index 33b6b843b..97ddf52f7 100644
--- a/_episodes/09-plotting.md
+++ b/_episodes/09-plotting.md
@@ -61,7 +61,10 @@ plt.ylabel('Position (km)')
 
 *   We can also plot [Pandas dataframes](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.html).
 *   This implicitly uses [`matplotlib.pyplot`](https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.html#module-matplotlib.pyplot).
-*   Before plotting, we convert the column headings from a `string` to `integer` data type, since they represent numerical values
+*   Before plotting, we convert the column headings from a `string` to `integer` data type, since they represent numerical values,
+    using [str.replace()](https://pandas.pydata.org/docs/reference/api/pandas.Series.str.replace.html) to remove the `gpdPercap_`
+	prefix and then [astype(int)](https://pandas.pydata.org/docs/reference/api/pandas.Series.astype.html)
+	to convert the series of string values (`['1952', '1957', ..., '2007']`) to a series of integers: `[1925, 1957, ..., 2007]`.
 
 ~~~
 import pandas as pd
@@ -72,7 +75,7 @@ data = pd.read_csv('data/gapminder_gdp_oceania.csv', index_col='country')
 # The current column names are structured as 'gdpPercap_(year)', 
 # so we want to keep the (year) part only for clarity when plotting GDP vs. years
 # To do this we use replace(), which removes from the string the characters stated in the argument
-# This method works on strings, so we call str before replace()
+# This method works on strings, so we access the str attribute before replace()
 
 years = data.columns.str.replace('gdpPercap_', '')
 

From 0030dbed88f045f17b6ad5ae0fd5a19fa39fafba Mon Sep 17 00:00:00 2001
From: Allen Lee <alee@users.noreply.github.com>
Date: Sat, 21 Jan 2023 07:22:46 -0700
Subject: [PATCH 3/3] minor wording changes

---
 _episodes/09-plotting.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/_episodes/09-plotting.md b/_episodes/09-plotting.md
index 97ddf52f7..a72b7dbeb 100644
--- a/_episodes/09-plotting.md
+++ b/_episodes/09-plotting.md
@@ -75,7 +75,7 @@ data = pd.read_csv('data/gapminder_gdp_oceania.csv', index_col='country')
 # The current column names are structured as 'gdpPercap_(year)', 
 # so we want to keep the (year) part only for clarity when plotting GDP vs. years
 # To do this we use replace(), which removes from the string the characters stated in the argument
-# This method works on strings, so we access the str attribute before replace()
+# This method works on strings, so we use replace() from Pandas Series.str vectorized string functions
 
 years = data.columns.str.replace('gdpPercap_', '')