From e0d9d7861878747e526e1c7074ddbdfbe5967665 Mon Sep 17 00:00:00 2001 From: Alan Crosswell Date: Fri, 13 Jan 2023 16:11:02 -0500 Subject: [PATCH 1/3] replace `str.strip()` with `str.replace()` `str.strip()` actually removes each character in the character class string. So `.strip('gpdaPerc_')` would also do the same thing and is confusing. I made a live typo during the workshop and a student asked me how was it that it still worked when I had add `gdp_Percap_` even though there's no `_` between `gdp` and `Per`! Reference: https://pandas.pydata.org/docs/reference/api/pandas.Series.str.strip.html and https://pandas.pydata.org/docs/reference/api/pandas.Series.str.replace.html --- _episodes/09-plotting.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/_episodes/09-plotting.md b/_episodes/09-plotting.md index 219d6cb95..33b6b843b 100644 --- a/_episodes/09-plotting.md +++ b/_episodes/09-plotting.md @@ -71,10 +71,10 @@ data = pd.read_csv('data/gapminder_gdp_oceania.csv', index_col='country') # Extract year from last 4 characters of each column name # The current column names are structured as 'gdpPercap_(year)', # so we want to keep the (year) part only for clarity when plotting GDP vs. years -# To do this we use strip(), which removes from the string the characters stated in the argument -# This method works on strings, so we call str before strip() +# To do this we use replace(), which removes from the string the characters stated in the argument +# This method works on strings, so we call str before replace() -years = data.columns.str.strip('gdpPercap_') +years = data.columns.str.replace('gdpPercap_', '') # Convert year values to integers, saving results back to dataframe From 36eb551196bda6b50de9994d7e6ac624e71e2eac Mon Sep 17 00:00:00 2001 From: Alan Crosswell Date: Wed, 18 Jan 2023 10:04:00 -0500 Subject: [PATCH 2/3] Updates per @alee review. --- _episodes/09-plotting.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/_episodes/09-plotting.md b/_episodes/09-plotting.md index 33b6b843b..97ddf52f7 100644 --- a/_episodes/09-plotting.md +++ b/_episodes/09-plotting.md @@ -61,7 +61,10 @@ plt.ylabel('Position (km)') * We can also plot [Pandas dataframes](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.html). * This implicitly uses [`matplotlib.pyplot`](https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.html#module-matplotlib.pyplot). -* Before plotting, we convert the column headings from a `string` to `integer` data type, since they represent numerical values +* Before plotting, we convert the column headings from a `string` to `integer` data type, since they represent numerical values, + using [str.replace()](https://pandas.pydata.org/docs/reference/api/pandas.Series.str.replace.html) to remove the `gpdPercap_` + prefix and then [astype(int)](https://pandas.pydata.org/docs/reference/api/pandas.Series.astype.html) + to convert the series of string values (`['1952', '1957', ..., '2007']`) to a series of integers: `[1925, 1957, ..., 2007]`. ~~~ import pandas as pd @@ -72,7 +75,7 @@ data = pd.read_csv('data/gapminder_gdp_oceania.csv', index_col='country') # The current column names are structured as 'gdpPercap_(year)', # so we want to keep the (year) part only for clarity when plotting GDP vs. years # To do this we use replace(), which removes from the string the characters stated in the argument -# This method works on strings, so we call str before replace() +# This method works on strings, so we access the str attribute before replace() years = data.columns.str.replace('gdpPercap_', '') From 0030dbed88f045f17b6ad5ae0fd5a19fa39fafba Mon Sep 17 00:00:00 2001 From: Allen Lee Date: Sat, 21 Jan 2023 07:22:46 -0700 Subject: [PATCH 3/3] minor wording changes --- _episodes/09-plotting.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_episodes/09-plotting.md b/_episodes/09-plotting.md index 97ddf52f7..a72b7dbeb 100644 --- a/_episodes/09-plotting.md +++ b/_episodes/09-plotting.md @@ -75,7 +75,7 @@ data = pd.read_csv('data/gapminder_gdp_oceania.csv', index_col='country') # The current column names are structured as 'gdpPercap_(year)', # so we want to keep the (year) part only for clarity when plotting GDP vs. years # To do this we use replace(), which removes from the string the characters stated in the argument -# This method works on strings, so we access the str attribute before replace() +# This method works on strings, so we use replace() from Pandas Series.str vectorized string functions years = data.columns.str.replace('gdpPercap_', '')