# Chapter 3: Series methods

## 3.1 Importing a data set with the read_csv function

In [1]:
import pandas as pd

In [2]:
# The two lines below are  equivalent
pd.read_csv(filepath_or_buffer="data/ch03/pokemon.csv")
pd.read_csv("data/ch03/pokemon.csv")

Unnamed: 0,Pokemon,Type
0,Bulbasaur,Grass / Poison
1,Ivysaur,Grass / Poison
2,Venusaur,Grass / Poison
3,Charmander,Fire
4,Charmeleon,Fire
...,...,...
804,Stakataka,Rock / Steel
805,Blacephalon,Fire / Ghost
806,Zeraora,Electric
807,Meltan,Steel


In [3]:
pd.read_csv("data/ch03/pokemon.csv", index_col="Pokemon")

Unnamed: 0_level_0,Type
Pokemon,Unnamed: 1_level_1
Bulbasaur,Grass / Poison
Ivysaur,Grass / Poison
Venusaur,Grass / Poison
Charmander,Fire
Charmeleon,Fire
...,...
Stakataka,Rock / Steel
Blacephalon,Fire / Ghost
Zeraora,Electric
Meltan,Steel


In [4]:
pd.read_csv("data/ch03/pokemon.csv", index_col="Pokemon").squeeze()

Pokemon
Bulbasaur      Grass / Poison
Ivysaur        Grass / Poison
Venusaur       Grass / Poison
Charmander               Fire
Charmeleon               Fire
                    ...      
Stakataka        Rock / Steel
Blacephalon      Fire / Ghost
Zeraora              Electric
Meltan                  Steel
Melmetal                Steel
Name: Type, Length: 809, dtype: object

In [5]:
pokemon = pd.read_csv("data/ch03/pokemon.csv",  index_col="Pokemon").squeeze()

In [6]:
pd.read_csv("data/ch03/google_stocks.csv").head()

Unnamed: 0,Date,Close
0,2004-08-19,49.98
1,2004-08-20,53.95
2,2004-08-23,54.5
3,2004-08-24,52.24
4,2004-08-25,52.8


In [7]:
pd.read_csv("data/ch03/google_stocks.csv", parse_dates=["Date"]).head()

Unnamed: 0,Date,Close
0,2004-08-19,49.98
1,2004-08-20,53.95
2,2004-08-23,54.5
3,2004-08-24,52.24
4,2004-08-25,52.8


In [8]:
pd.read_csv(
    "data/ch03/google_stocks.csv", 
    parse_dates=["Date"], 
    index_col="Date").squeeze().head()

Date
2004-08-19    49.98
2004-08-20    53.95
2004-08-23    54.50
2004-08-24    52.24
2004-08-25    52.80
Name: Close, dtype: float64

In [9]:
google = pd.read_csv(
    "data/ch03/google_stocks.csv",
    parse_dates=["Date"],
    index_col="Date",
).squeeze()

In [10]:
pd.read_csv("data/ch03/revolutionary_war.csv").tail()

Unnamed: 0,Battle,Start Date,State
227,Siege of Fort Henry,9/11/1782,Virginia
228,Grand Assault on Gibraltar,9/13/1782,
229,Action of 18 October 1782,10/18/1782,
230,Action of 6 December 1782,12/6/1782,
231,Action of 22 January 1783,1/22/1783,Virginia


In [11]:
pd.read_csv(
    "data/ch03/revolutionary_war.csv",
    index_col="Start Date",
    parse_dates=["Start Date"]).tail()

Unnamed: 0_level_0,Battle,State
Start Date,Unnamed: 1_level_1,Unnamed: 2_level_1
1782-09-11,Siege of Fort Henry,Virginia
1782-09-13,Grand Assault on Gibraltar,
1782-10-18,Action of 18 October 1782,
1782-12-06,Action of 6 December 1782,
1783-01-22,Action of 22 January 1783,Virginia


In [12]:
pd.read_csv(
    "data/ch03/revolutionary_war.csv",
    index_col="Start Date",
    parse_dates=["Start Date"],
    usecols=["State", "Start Date"]
).squeeze().tail()

Start Date
1782-09-11    Virginia
1782-09-13         NaN
1782-10-18         NaN
1782-12-06         NaN
1783-01-22    Virginia
Name: State, dtype: object

In [13]:
battles = pd.read_csv(
    "data/ch03/revolutionary_war.csv",
    index_col="Start Date",
    parse_dates=["Start Date"],
    usecols=["State", "Start Date"]
).squeeze()

## 3.2 Sorting a Series

### 3.2.1 Sorting by values  with the sort_values method

In [14]:
google.sort_values()

Date
2004-09-03      49.82
2004-09-01      49.94
2004-08-19      49.98
2004-09-02      50.57
2004-09-07      50.60
               ...   
2019-04-23    1264.55
2019-10-25    1265.13
2018-07-26    1268.33
2019-04-26    1272.18
2019-04-29    1287.58
Name: Close, Length: 3824, dtype: float64

In [15]:
pokemon.sort_values()

Pokemon
Illumise                Bug
Silcoon                 Bug
Pinsir                  Bug
Burmy                   Bug
Wurmple                 Bug
                  ...      
Tirtouga       Water / Rock
Relicanth      Water / Rock
Corsola        Water / Rock
Carracosta     Water / Rock
Empoleon      Water / Steel
Name: Type, Length: 809, dtype: object

In [16]:
pd.Series(data=["Adam", "adam", "Ben"]).sort_values()

0    Adam
2     Ben
1    adam
dtype: object

In [17]:
google.sort_values(ascending=False).head()

Date
2019-04-29    1287.58
2019-04-26    1272.18
2018-07-26    1268.33
2019-10-25    1265.13
2019-04-23    1264.55
Name: Close, dtype: float64

In [18]:
pokemon.sort_values(ascending=False).head()

Pokemon
Empoleon      Water / Steel
Corsola        Water / Rock
Relicanth      Water / Rock
Carracosta     Water / Rock
Tirtouga       Water / Rock
Name: Type, dtype: object

In [19]:
# The two lines below are equivalent
battles.sort_values()
battles.sort_values(na_position="last")

Start Date
1781-09-06    Connecticut
1779-07-05    Connecticut
1777-04-27    Connecticut
1777-09-03       Delaware
1777-05-17        Florida
                 ...     
1782-08-08            NaN
1782-08-25            NaN
1782-09-13            NaN
1782-10-18            NaN
1782-12-06            NaN
Name: State, Length: 232, dtype: object

In [20]:
battles.sort_values(na_position="first")

Start Date
1775-09-17         NaN
1775-12-31         NaN
1776-03-03         NaN
1776-03-25         NaN
1776-05-18         NaN
                ...   
1781-07-06    Virginia
1781-07-01    Virginia
1781-06-26    Virginia
1781-04-25    Virginia
1783-01-22    Virginia
Name: State, Length: 232, dtype: object

In [21]:
battles.dropna().sort_values()

Start Date
1781-09-06    Connecticut
1779-07-05    Connecticut
1777-04-27    Connecticut
1777-09-03       Delaware
1777-05-17        Florida
                 ...     
1781-07-06       Virginia
1781-07-01       Virginia
1781-06-26       Virginia
1781-04-25       Virginia
1783-01-22       Virginia
Name: State, Length: 162, dtype: object

### 3.2.2 Sorting by index with the sort_index method

In [22]:
# The two lines below are equivalent
pokemon.sort_index()
pokemon.sort_index(ascending=True)

Pokemon
Abomasnow        Grass / Ice
Abra                 Psychic
Absol                   Dark
Accelgor                 Bug
Aegislash      Steel / Ghost
                  ...       
Zoroark                 Dark
Zorua                   Dark
Zubat        Poison / Flying
Zweilous       Dark / Dragon
Zygarde      Dragon / Ground
Name: Type, Length: 809, dtype: object

In [23]:
battles.sort_index()

Start Date
1774-09-01    Massachusetts
1774-12-14    New Hampshire
1775-04-19    Massachusetts
1775-04-19    Massachusetts
1775-04-20         Virginia
                  ...      
1783-01-22         Virginia
NaT              New Jersey
NaT                Virginia
NaT                     NaN
NaT                     NaN
Name: State, Length: 232, dtype: object

In [24]:
battles.sort_index(na_position="first").head()

Start Date
NaT              New Jersey
NaT                Virginia
NaT                     NaN
NaT                     NaN
1774-09-01    Massachusetts
Name: State, dtype: object

In [25]:
battles.sort_index(ascending=False).head()

Start Date
1783-01-22    Virginia
1782-12-06         NaN
1782-10-18         NaN
1782-09-13         NaN
1782-09-11    Virginia
Name: State, dtype: object

### 3.2.3 Retrieving the smallest and largest values with the nsmallest and nlargest methods

In [26]:
google.sort_values(ascending=False).head()

Date
2019-04-29    1287.58
2019-04-26    1272.18
2018-07-26    1268.33
2019-10-25    1265.13
2019-04-23    1264.55
Name: Close, dtype: float64

In [27]:
# The two lines below are equivalent
google.nlargest(n=5)
google.nlargest()

Date
2019-04-29    1287.58
2019-04-26    1272.18
2018-07-26    1268.33
2019-10-25    1265.13
2019-04-23    1264.55
Name: Close, dtype: float64

In [28]:
# The two lines below are equivalent
google.nsmallest(n=5)
google.nsmallest()

Date
2004-09-03    49.82
2004-09-01    49.94
2004-08-19    49.98
2004-09-02    50.57
2004-09-07    50.60
Name: Close, dtype: float64

## 3.3 Overwriting a Series with the inplace parameter

In [29]:
battles.head(3)

Start Date
1774-09-01    Massachusetts
1774-12-14    New Hampshire
1775-04-19    Massachusetts
Name: State, dtype: object

In [30]:
battles.sort_values().head(3)

Start Date
1781-09-06    Connecticut
1779-07-05    Connecticut
1777-04-27    Connecticut
Name: State, dtype: object

In [31]:
battles.head(3)

Start Date
1774-09-01    Massachusetts
1774-12-14    New Hampshire
1775-04-19    Massachusetts
Name: State, dtype: object

In [32]:
battles.head(3)

Start Date
1774-09-01    Massachusetts
1774-12-14    New Hampshire
1775-04-19    Massachusetts
Name: State, dtype: object

In [33]:
sorted_battles = battles.sort_values()

In [34]:
battles

Start Date
1774-09-01    Massachusetts
1774-12-14    New Hampshire
1775-04-19    Massachusetts
1775-04-19    Massachusetts
1775-04-20         Virginia
                  ...      
1782-09-11         Virginia
1782-09-13              NaN
1782-10-18              NaN
1782-12-06              NaN
1783-01-22         Virginia
Name: State, Length: 232, dtype: object

In [35]:
sorted_battles

Start Date
1781-09-06    Connecticut
1779-07-05    Connecticut
1777-04-27    Connecticut
1777-09-03       Delaware
1777-05-17        Florida
                 ...     
1782-08-08            NaN
1782-08-25            NaN
1782-09-13            NaN
1782-10-18            NaN
1782-12-06            NaN
Name: State, Length: 232, dtype: object

## 3.4 Counting values with the value_counts method

In [36]:
pokemon.head()

Pokemon
Bulbasaur     Grass / Poison
Ivysaur       Grass / Poison
Venusaur      Grass / Poison
Charmander              Fire
Charmeleon              Fire
Name: Type, dtype: object

In [37]:
pokemon.value_counts(ascending=True)

Type
Fire / Ghost         1
Fighting / Dark      1
Fighting / Steel     1
Normal / Ground      1
Fire / Psychic       1
                    ..
Fire                30
Psychic             35
Grass               38
Water               61
Normal              65
Name: count, Length: 159, dtype: int64

In [38]:
len(pokemon.value_counts())

159

In [39]:
pokemon.nunique()

159

In [40]:
pokemon.value_counts()

Type
Normal                65
Water                 61
Grass                 38
Psychic               35
Fire                  30
                      ..
Fire / Psychic         1
Normal / Ground        1
Psychic / Fighting     1
Dark / Ghost           1
Fire / Ghost           1
Name: count, Length: 159, dtype: int64

In [41]:
pokemon.value_counts(normalize=True).head()

Type
Normal     0.080346
Water      0.075402
Grass      0.046972
Psychic    0.043263
Fire       0.037083
Name: proportion, dtype: float64

In [42]:
pokemon.value_counts(normalize=True).head()  * 100

Type
Normal     8.034611
Water      7.540173
Grass      4.697157
Psychic    4.326329
Fire       3.708282
Name: proportion, dtype: float64

In [43]:
(pokemon.value_counts(normalize=True) * 100).round(2)

Type
Normal                8.03
Water                 7.54
Grass                 4.70
Psychic               4.33
Fire                  3.71
                      ... 
Fire / Psychic        0.12
Normal / Ground       0.12
Psychic / Fighting    0.12
Dark / Ghost          0.12
Fire / Ghost          0.12
Name: proportion, Length: 159, dtype: float64

In [44]:
battles

Start Date
1774-09-01    Massachusetts
1774-12-14    New Hampshire
1775-04-19    Massachusetts
1775-04-19    Massachusetts
1775-04-20         Virginia
                  ...      
1782-09-11         Virginia
1782-09-13              NaN
1782-10-18              NaN
1782-12-06              NaN
1783-01-22         Virginia
Name: State, Length: 232, dtype: object

In [45]:
google.value_counts().head()

Close
287.68    3
194.27    3
307.10    3
288.92    3
290.41    3
Name: count, dtype: int64

In [46]:
google.max()

1287.58

In [47]:
google.min()

49.82

In [48]:
buckets = [0, 200, 400, 600, 800, 1000, 1200, 1400]
google.value_counts(bins=buckets)

Close
(200.0, 400.0]      1568
(-0.001, 200.0]      595
(400.0, 600.0]       575
(1000.0, 1200.0]     406
(600.0, 800.0]       380
(800.0, 1000.0]      207
(1200.0, 1400.0]      93
Name: count, dtype: int64

In [49]:
google.value_counts(bins=buckets).sort_index()

Close
(-0.001, 200.0]      595
(200.0, 400.0]      1568
(400.0, 600.0]       575
(600.0, 800.0]       380
(800.0, 1000.0]      207
(1000.0, 1200.0]     406
(1200.0, 1400.0]      93
Name: count, dtype: int64

In [50]:
# achieve the same result as above by passing a value of False to the sort parameter
google.value_counts(bins=buckets, sort=False)

Close
(-0.001, 200.0]      595
(200.0, 400.0]      1568
(400.0, 600.0]       575
(600.0, 800.0]       380
(800.0, 1000.0]      207
(1000.0, 1200.0]     406
(1200.0, 1400.0]      93
Name: count, dtype: int64

In [51]:
google.value_counts(bins=6, sort=False)

Close
(48.581, 256.113]      1204
(256.113, 462.407]     1104
(462.407, 668.7]        507
(668.7, 874.993]        380
(874.993, 1081.287]     292
(1081.287, 1287.58]     337
Name: count, dtype: int64

In [52]:
battles.head()

Start Date
1774-09-01    Massachusetts
1774-12-14    New Hampshire
1775-04-19    Massachusetts
1775-04-19    Massachusetts
1775-04-20         Virginia
Name: State, dtype: object

In [53]:
battles.value_counts().head()

State
South Carolina    31
New York          28
New Jersey        24
Virginia          21
Massachusetts     11
Name: count, dtype: int64

In [54]:
battles.value_counts(dropna=False).head()

State
NaN               70
South Carolina    31
New York          28
New Jersey        24
Virginia          21
Name: count, dtype: int64

In [55]:
battles.index

DatetimeIndex(['1774-09-01', '1774-12-14', '1775-04-19', '1775-04-19',
               '1775-04-20', '1775-05-10', '1775-05-27', '1775-06-11',
               '1775-06-17', '1775-08-08',
               ...
               '1782-08-08', '1782-08-15', '1782-08-19', '1782-08-26',
               '1782-08-25', '1782-09-11', '1782-09-13', '1782-10-18',
               '1782-12-06', '1783-01-22'],
              dtype='datetime64[ns]', name='Start Date', length=232, freq=None)

In [56]:
battles.index.value_counts()

Start Date
1781-04-25    2
1781-05-22    2
1780-08-18    2
1781-09-13    2
1782-03-16    2
             ..
1778-06-30    1
1778-07-03    1
1778-07-27    1
1778-08-21    1
1783-01-22    1
Name: count, Length: 217, dtype: int64

## 3.5 Invoking a function on every Series value with the apply method

In [57]:
funcs = [len, max, min]

In [58]:
for current_func in funcs:
    print(current_func(google))

3824
1287.58
49.82


In [59]:
round(99.2)

99

In [60]:
round(99.49)

99

In [61]:
round(99.5)

100

In [62]:
# The two lines below are equivalent
google.apply(func=round)
google.apply(round)

Date
2004-08-19      50
2004-08-20      54
2004-08-23      54
2004-08-24      52
2004-08-25      53
              ... 
2019-10-21    1246
2019-10-22    1243
2019-10-23    1259
2019-10-24    1261
2019-10-25    1265
Name: Close, Length: 3824, dtype: int64

In [63]:
def single_or_multi(pokemon_type):
    if "/" in pokemon_type:
        return "Multi"

    return "Single"

In [64]:
pokemon.head(4)

Pokemon
Bulbasaur     Grass / Poison
Ivysaur       Grass / Poison
Venusaur      Grass / Poison
Charmander              Fire
Name: Type, dtype: object

In [65]:
pokemon.apply(single_or_multi)

Pokemon
Bulbasaur       Multi
Ivysaur         Multi
Venusaur        Multi
Charmander     Single
Charmeleon     Single
                ...  
Stakataka       Multi
Blacephalon     Multi
Zeraora        Single
Meltan         Single
Melmetal       Single
Name: Type, Length: 809, dtype: object

In [66]:
pokemon.apply(single_or_multi).value_counts()

Type
Multi     405
Single    404
Name: count, dtype: int64

## 3.6 Coding challenge

### 3.6.1 Problems

The final output should be a Series with the days (Sunday, Monday, and so on) as index labels
and a count of balttes on each day as the values.
```
Saturday 39
Friday 39
Wednesday 32
Thursday 31
Sunday 31
Tuesday 29
Monday 27

In [67]:
import datetime as dt
today = dt.datetime(2020, 12, 26)
today.strftime("%A")

'Saturday'

### 3.6.2 Solutions

In [68]:
pd.read_csv("data/ch03/revolutionary_war.csv").head()

Unnamed: 0,Battle,Start Date,State
0,Powder Alarm,9/1/1774,Massachusetts
1,Storming of Fort William and Mary,12/14/1774,New Hampshire
2,Battles of Lexington and Concord,4/19/1775,Massachusetts
3,Siege of Boston,4/19/1775,Massachusetts
4,Gunpowder Incident,4/20/1775,Virginia


In [69]:
days_of_war = pd.read_csv(
    "data/ch03/revolutionary_war.csv",
    usecols=["Start Date"],
    parse_dates=["Start Date"]
).squeeze()
days_of_war.head()

0   1774-09-01
1   1774-12-14
2   1775-04-19
3   1775-04-19
4   1775-04-20
Name: Start Date, dtype: datetime64[ns]

In [70]:
def day_of_week(date):
    return date.strftime("%A")

In [71]:
# days_of_war.apply(day_of_week) # ValueError

In [72]:
days_of_war.dropna().apply(day_of_week)

0       Thursday
1      Wednesday
2      Wednesday
3      Wednesday
4       Thursday
         ...    
227    Wednesday
228       Friday
229       Friday
230       Friday
231    Wednesday
Name: Start Date, Length: 228, dtype: object

In [73]:
days_of_war.dropna().apply(day_of_week).value_counts()

Start Date
Saturday     39
Friday       39
Wednesday    32
Thursday     31
Sunday       31
Tuesday      29
Monday       27
Name: count, dtype: int64