# 8 More Useful Pandas Functionalities For Your Analysis

## Setup

In [2]:
import pandas as pd
import numpy as np

## 1. `nlargest` / `nsmallest`

In [2]:
numbers = [1, 2, 3, 4, 5, 6, 7, 7, 8, 9]
df = pd.DataFrame(data={"a": numbers,
                        "b": numbers[::-1]})
df

Unnamed: 0,a,b
0,1,9
1,2,8
2,3,7
3,4,7
4,5,6
5,6,5
6,7,4
7,7,3
8,8,2
9,9,1


In [3]:
df.nlargest(3, "a")

Unnamed: 0,a,b
9,9,1
8,8,2
6,7,4


In [4]:
df.nlargest(3, "a", keep="last")

Unnamed: 0,a,b
9,9,1
8,8,2
7,7,3


In [5]:
df.nlargest(3, "a", keep="all")

Unnamed: 0,a,b
9,9,1
8,8,2
6,7,4
7,7,3


In [6]:
df.nlargest(3, ["a", "b"])

Unnamed: 0,a,b
9,9,1
8,8,2
6,7,4


## 2. `hide_index` / `hide_columns`

In [None]:
numbers = [1, 2, 3, 4, 5, 6, 7, 7, 8, 9]
df = pd.DataFrame(data={"a": numbers,
                        "b": numbers[::-1]})
df

Unnamed: 0,a,b
0,1,9
1,2,8
2,3,7
3,4,7
4,5,6
5,6,5
6,7,4
7,7,3
8,8,2
9,9,1


In [24]:
df[df["a"] > 5].style.hide_index()

a,b
6,5
7,4
7,3
8,2
9,1


In [25]:
df[df["a"] > 5].style.hide_index().hide_columns("b")

a
6
7
7
8
9


## 3. `query`

In [27]:
df.query("a > 5")

Unnamed: 0,a,b
5,6,5
6,7,4
7,7,3
8,8,2
9,9,1


In [30]:
df.query("a > 5 and b < 3")

Unnamed: 0,a,b
8,8,2
9,9,1


In [39]:
df[(df["a"] > 5) & (df["b"] < 3)]

Unnamed: 0,a,b
8,8,2
9,9,1


## 4. `first_valid_index` / `last_valid_index`

In [18]:
df = pd.DataFrame(data={"a": [np.NaN, 2, 3, 4, np.NaN],
                        "b": [np.NaN, np.NaN, np.NaN, 4, 5]})
df

Unnamed: 0,a,b
0,,
1,2.0,
2,3.0,
3,4.0,4.0
4,,5.0


In [19]:
df.first_valid_index()

1

In [20]:
df.apply(lambda x: x.first_valid_index())

a    1
b    3
dtype: int64

In [14]:
df = pd.DataFrame(data={"a": [np.NaN, np.NaN],
                        "b": [np.NaN, np.NaN, ]})
df

Unnamed: 0,a,b
0,,
1,,


In [16]:
df.first_valid_index()

In [21]:
df.apply(lambda x: x.first_valid_index())

a    1
b    3
dtype: int64

## 5. `is_monotonic` / `is_monotonic_decreasing`

In [11]:
df = pd.DataFrame(data={"a": [1, 2, 3, 4, 5, 6]})
df

Unnamed: 0,a
0,1
1,2
2,3
3,4
4,4
5,5
6,6


In [12]:
df["a"].is_monotonic

True

In [9]:
df["a"].is_monotonic_decreasing

False

In [10]:
pd.date_range("2000-01-01", "2000-01-31").is_monotonic

True

## 6. `replace`

In [21]:
df = pd.DataFrame(data={"a": [1, 2, 3, 4, 5],
                        "b": [2, 2, 5, 5, 2]})
df

Unnamed: 0,a,b
0,1,2
1,2,2
2,3,5
3,4,5
4,5,2


In [22]:
df.replace(2, 999)

Unnamed: 0,a,b
0,1,999
1,999,999
2,3,5
3,4,5
4,5,999


In [23]:
df.replace({"a" : {1: "one", 2: "two", 3: "three", 4: "four", 5: "five"}})

Unnamed: 0,a,b
0,one,2
1,two,2
2,three,5
3,four,5
4,five,2


## 7. `mask` / `where`

In [24]:
df = pd.DataFrame(data={"a": [1, 2, 3, 4, 5],
                        "b": [2, 2, 5, 5, 2]})
df

Unnamed: 0,a,b
0,1,2
1,2,2
2,3,5
3,4,5
4,5,2


In [27]:
df["a"].mask(df["a"] < 4, "Less than 3")

0    Less than 3
1    Less than 3
2    Less than 3
3              4
4              5
Name: a, dtype: object

In [31]:
df["a"].where(df["a"] < 4, "More than 3")

0              1
1              2
2              3
3    More than 3
4    More than 3
Name: a, dtype: object

## 8. `clip`

In [32]:
df = pd.DataFrame(data={"a": [1, 2, 3, 4, 9],
                        "b": [8, 8, 9, 4, 1]})
df

Unnamed: 0,a,b
0,1,8
1,2,8
2,3,9
3,4,4
4,9,1


In [33]:
df.clip(4, 7)

Unnamed: 0,a,b
0,4,7
1,4,7
2,4,7
3,4,4
4,7,4


In [34]:
df.clip(upper=7)

Unnamed: 0,a,b
0,1,7
1,2,7
2,3,7
3,4,4
4,7,1
