In [2]:
import pandas as pd
import numpy as np

## 'Binning' data with `pd.cut`

In [18]:
df = pd.DataFrame({
    'hour': [8,9,10,11,12,13,14,15,16,17,18],
    'pickups': np.random.randint(0,10, size=11)
})
df

Unnamed: 0,hour,pickups
0,8,3
1,9,0
2,10,6
3,11,3
4,12,9
5,13,2
6,14,1
7,15,8
8,16,9
9,17,5


In [23]:
df['timeslot'] = pd.cut(df['hour'], bins=[8,10,12,14,16,18,20], right=False)
df

Unnamed: 0,hour,pickups,timeslot
0,8,3,"[8, 10)"
1,9,0,"[8, 10)"
2,10,6,"[10, 12)"
3,11,3,"[10, 12)"
4,12,9,"[12, 14)"
5,13,2,"[12, 14)"
6,14,1,"[14, 16)"
7,15,8,"[14, 16)"
8,16,9,"[16, 18)"
9,17,5,"[16, 18)"


`pd.cut` creates a Categorical column. Square brackets indicate the boundary is included and parentheses indicate they are not. By default the left-hand boundary will be excluded and the right will be included. Set `right=False` to reverse that.

In [24]:
df['timeslot'].dtype

CategoricalDtype(categories=[[8, 10), [10, 12), [12, 14), [14, 16), [16, 18), [18, 20)], ordered=True)

You can also define custom labels for each bin...

In [20]:
df['timeslot'] = pd.cut(
    df['hour'], 
    bins=[8,10,12,14,16,18,20], 
    right=False, 
    labels=['8-10', '10-12', '12-2', '2-4', '4-6', '6+'])
df

Unnamed: 0,hour,pickups,timeslot
0,8,3,8-10
1,9,0,8-10
2,10,6,10-12
3,11,3,10-12
4,12,9,12-2
5,13,2,12-2
6,14,1,2-4
7,15,8,2-4
8,16,9,4-6
9,17,5,4-6
