In [1]:
import pandas as pd
import numpy as np


In [4]:
pd.read_csv(filepath_or_buffer="pokemon.csv", index_col="Pokemon")

Unnamed: 0_level_0,Type
Pokemon,Unnamed: 1_level_1
Bulbasaur,Grass / Poison
Ivysaur,Grass / Poison
Venusaur,Grass / Poison
Charmander,Fire
Charmeleon,Fire
...,...
Stakataka,Rock / Steel
Blacephalon,Fire / Ghost
Zeraora,Electric
Meltan,Steel


## Caution

We’ve successfully set the Pokemon column as the Series index, but pandas still defaults to importing the data into a DataFrame. After all, a container capable of holding multiple columns of data can technically hold one column of data. To force pandas to use a Series, we need to add another parameter called squeeze and pass it an argument of True. The squeeze parameter coerces a one-column DataFrame into a Series.      Now, use squeeze() method.

In [10]:
pokemon = pd.read_csv(filepath_or_buffer="pokemon.csv", index_col="Pokemon").squeeze()

In [11]:
pokemon

Pokemon
Bulbasaur      Grass / Poison
Ivysaur        Grass / Poison
Venusaur       Grass / Poison
Charmander               Fire
Charmeleon               Fire
                    ...      
Stakataka        Rock / Steel
Blacephalon      Fire / Ghost
Zeraora              Electric
Meltan                  Steel
Melmetal                Steel
Name: Type, Length: 809, dtype: object

In [37]:
pokemon.sort_index(ascending=True)

Pokemon
Abomasnow        Grass / Ice
Abra                 Psychic
Absol                   Dark
Accelgor                 Bug
Aegislash      Steel / Ghost
                  ...       
Zoroark                 Dark
Zorua                   Dark
Zubat        Poison / Flying
Zweilous       Dark / Dragon
Zygarde      Dragon / Ground
Name: Type, Length: 809, dtype: object

In [48]:
pokemon.value_counts(ascending=False)

Type
Normal              65
Water               61
Grass               38
Psychic             35
Fire                30
                    ..
Normal / Dragon      1
Psychic / Steel      1
Rock / Poison        1
Fighting / Ghost     1
Fire / Ghost         1
Name: count, Length: 159, dtype: int64

In [47]:
len(pokemon.value_counts()) == pokemon.nunique()

True

In [52]:
# the normalize=True of value_counts() method return the frequencies of each unique value

(pokemon.value_counts(normalize=True) * 100).round(decimals=3)

Type
Normal              8.035
Water               7.540
Grass               4.697
Psychic             4.326
Fire                3.708
                    ...  
Normal / Dragon     0.124
Psychic / Steel     0.124
Rock / Poison       0.124
Fighting / Ghost    0.124
Fire / Ghost        0.124
Name: proportion, Length: 159, dtype: float64

In [74]:
pokemon.head()

Pokemon
Bulbasaur     Grass / Poison
Ivysaur       Grass / Poison
Venusaur      Grass / Poison
Charmander              Fire
Charmeleon              Fire
Name: Type, dtype: object

In [76]:
def single_or_multi(pokemon_type):
    return "Multi" if "/" in pokemon_type else "Single"

pokemon.apply(func=single_or_multi).value_counts(ascending=False)

Type
Multi     405
Single    404
Name: count, dtype: int64

In [12]:
pd.read_csv("google_stocks.csv"). head()

Unnamed: 0,Date,Close
0,2004-08-19,49.98
1,2004-08-20,53.95
2,2004-08-23,54.5
3,2004-08-24,52.24
4,2004-08-25,52.8


In [13]:
pd.read_csv("google_stocks.csv", parse_dates=["Date"]).head()

Unnamed: 0,Date,Close
0,2004-08-19,49.98
1,2004-08-20,53.95
2,2004-08-23,54.5
3,2004-08-24,52.24
4,2004-08-25,52.8


In [18]:
google = pd.read_csv(
    "google_stocks.csv",
    parse_dates=["Date"],
    index_col="Date",
).squeeze()

In [19]:
google.head()

Date
2004-08-19    49.98
2004-08-20    53.95
2004-08-23    54.50
2004-08-24    52.24
2004-08-25    52.80
Name: Close, dtype: float64

In [26]:
google.sort_values(ascending=True)

Date
2004-09-03      49.82
2004-09-01      49.94
2004-08-19      49.98
2004-09-02      50.57
2004-09-07      50.60
               ...   
2019-04-23    1264.55
2019-10-25    1265.13
2018-07-26    1268.33
2019-04-26    1272.18
2019-04-29    1287.58
Name: Close, Length: 3824, dtype: float64

In [41]:
google.nlargest(n=5)

Date
2019-04-29    1287.58
2019-04-26    1272.18
2018-07-26    1268.33
2019-10-25    1265.13
2019-04-23    1264.55
Name: Close, dtype: float64

In [53]:
google.nsmallest()

Date
2004-09-03    49.82
2004-09-01    49.94
2004-08-19    49.98
2004-09-02    50.57
2004-09-07    50.60
Name: Close, dtype: float64

In [55]:
google.value_counts(ascending=False).head(10)

Close
288.92    3
307.10    3
194.27    3
290.41    3
287.68    3
237.04    3
401.59    2
281.10    2
294.14    2
282.14    2
Name: count, dtype: int64

In [56]:
google.max(), google.min()

(np.float64(1287.58), np.float64(49.82))

In [58]:
buckets = [0, 200, 400, 600, 800, 1000, 1200, 1400]

google.value_counts(bins=buckets, ascending=False)

(200.0, 400.0]      1568
(-0.001, 200.0]      595
(400.0, 600.0]       575
(1000.0, 1200.0]     406
(600.0, 800.0]       380
(800.0, 1000.0]      207
(1200.0, 1400.0]      93
Name: count, dtype: int64

In [59]:
google.value_counts(bins=buckets, ascending=False).sort_index()

(-0.001, 200.0]      595
(200.0, 400.0]      1568
(400.0, 600.0]       575
(600.0, 800.0]       380
(800.0, 1000.0]      207
(1000.0, 1200.0]     406
(1200.0, 1400.0]      93
Name: count, dtype: int64

In [60]:
google.value_counts(bins=buckets, ascending=False, sort=False)

(-0.001, 200.0]      595
(200.0, 400.0]      1568
(400.0, 600.0]       575
(600.0, 800.0]       380
(800.0, 1000.0]      207
(1000.0, 1200.0]     406
(1200.0, 1400.0]      93
Name: count, dtype: int64

In [61]:
google.value_counts(bins=10, ascending=False, sort=False)

(48.581, 173.596]        409
(173.596, 297.372]      1298
(297.372, 421.148]       484
(421.148, 544.924]       345
(544.924, 668.7]         279
(668.7, 792.476]         287
(792.476, 916.252]       107
(916.252, 1040.028]      185
(1040.028, 1163.804]     255
(1163.804, 1287.58]      175
Name: count, dtype: int64

In [73]:
from functools import partial

google.apply(func=lambda val: round(val, 1))
# google.apply(func=round)
# google.apply(func=partial(round, 1))

Date
2004-08-19      50.0
2004-08-20      54.0
2004-08-23      54.5
2004-08-24      52.2
2004-08-25      52.8
               ...  
2019-10-21    1246.2
2019-10-22    1242.8
2019-10-23    1259.1
2019-10-24    1261.0
2019-10-25    1265.1
Name: Close, Length: 3824, dtype: float64

In [21]:
pd.read_csv(
    "revolutionary_war.csv",
    index_col="Start Date",
    parse_dates=["Start Date"],
).tail()

Unnamed: 0_level_0,Battle,State
Start Date,Unnamed: 1_level_1,Unnamed: 2_level_1
1782-09-11,Siege of Fort Henry,Virginia
1782-09-13,Grand Assault on Gibraltar,
1782-10-18,Action of 18 October 1782,
1782-12-06,Action of 6 December 1782,
1783-01-22,Action of 22 January 1783,Virginia


In [23]:
battls = pd.read_csv(
    "revolutionary_war.csv",
    index_col="Start Date",
    parse_dates=["Start Date"],
    usecols=["State", "Start Date"]
).squeeze()

In [24]:
battls.tail()

Start Date
1782-09-11    Virginia
1782-09-13         NaN
1782-10-18         NaN
1782-12-06         NaN
1783-01-22    Virginia
Name: State, dtype: object

In [31]:
# By default, pandas places missing values NaN at the end of a sorted Series.
# Or you can set na_position="first"

battls.sort_values(ascending=True, na_position="last")

Start Date
1781-09-06    Connecticut
1779-07-05    Connecticut
1777-04-27    Connecticut
1777-09-03       Delaware
1777-05-17        Florida
                 ...     
1782-08-08            NaN
1782-08-25            NaN
1782-09-13            NaN
1782-10-18            NaN
1782-12-06            NaN
Name: State, Length: 232, dtype: object

In [36]:
# The dropna method returns a Series with all missing values removed.

battls.dropna().sort_values(ascending=True)

Start Date
1781-09-06    Connecticut
1779-07-05    Connecticut
1777-04-27    Connecticut
1777-09-03       Delaware
1777-05-17        Florida
                 ...     
1781-07-06       Virginia
1781-07-01       Virginia
1781-06-26       Virginia
1781-04-25       Virginia
1783-01-22       Virginia
Name: State, Length: 162, dtype: object

In [38]:
battls.sort_index(ascending=True)

Start Date
1774-09-01    Massachusetts
1774-12-14    New Hampshire
1775-04-19    Massachusetts
1775-04-19    Massachusetts
1775-04-20         Virginia
                  ...      
1783-01-22         Virginia
NaT              New Jersey
NaT                Virginia
NaT                     NaN
NaT                     NaN
Name: State, Length: 232, dtype: object

In [39]:
battls.sort_index(ascending=True, na_position="first")

Start Date
NaT              New Jersey
NaT                Virginia
NaT                     NaN
NaT                     NaN
1774-09-01    Massachusetts
                  ...      
1782-09-11         Virginia
1782-09-13              NaN
1782-10-18              NaN
1782-12-06              NaN
1783-01-22         Virginia
Name: State, Length: 232, dtype: object

In [62]:
battls

Start Date
1774-09-01    Massachusetts
1774-12-14    New Hampshire
1775-04-19    Massachusetts
1775-04-19    Massachusetts
1775-04-20         Virginia
                  ...      
1782-09-11         Virginia
1782-09-13              NaN
1782-10-18              NaN
1782-12-06              NaN
1783-01-22         Virginia
Name: State, Length: 232, dtype: object

In [65]:
battls.value_counts(ascending=False, dropna=False).head()

State
NaN               70
South Carolina    31
New York          28
New Jersey        24
Virginia          21
Name: count, dtype: int64

In [66]:
battls.index

DatetimeIndex(['1774-09-01', '1774-12-14', '1775-04-19', '1775-04-19',
               '1775-04-20', '1775-05-10', '1775-05-27', '1775-06-11',
               '1775-06-17', '1775-08-08',
               ...
               '1782-08-08', '1782-08-15', '1782-08-19', '1782-08-26',
               '1782-08-25', '1782-09-11', '1782-09-13', '1782-10-18',
               '1782-12-06', '1783-01-22'],
              dtype='datetime64[ns]', name='Start Date', length=232, freq=None)

In [67]:
battls.index.value_counts(ascending=False)

Start Date
1775-04-19    2
1778-09-07    2
1777-08-22    2
1781-04-25    2
1781-09-13    2
             ..
1782-09-11    1
1782-09-13    1
1782-10-18    1
1782-12-06    1
1783-01-22    1
Name: count, Length: 217, dtype: int64