# using pandas cut()

https://dagster.io/glossary/data-discretization


In [1]:
import pandas as pd

# create a sample dataset
data = pd.DataFrame({'values': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]})

# bin the values into three intervals
data['bins'] = pd.cut(data['values'], bins=3, labels=['Low', 'Medium', 'High'])

# view the binned data
print(data)



   values    bins
0       1     Low
1       2     Low
2       3     Low
3       4     Low
4       5  Medium
5       6  Medium
6       7  Medium
7       8    High
8       9    High
9      10    High


# Other example using cut

In [2]:
import pandas as pd

# create a sample dataset
data = {'ages': [21, 32, 45, 18, 52, 28, 38, 50]}
df = pd.DataFrame(data)

bins = [0, 18, 35, 50, float('inf')]
labels = ['child', 'young adult', 'middle-aged', 'senior']

df['age_group'] = pd.cut(df['ages'], bins=bins, labels=labels)

print(df)


   ages    age_group
0    21  young adult
1    32  young adult
2    45  middle-aged
3    18        child
4    52       senior
5    28  young adult
6    38  middle-aged
7    50  middle-aged


# using scikit-learn

In [3]:
from sklearn.preprocessing import KBinsDiscretizer

# create a sample dataset
data = [[1], [2], [3], [4], [5], [6], [7], [8], [9], [10]]

# bin the values into three intervals
discretizer = KBinsDiscretizer(n_bins=3, encode='ordinal', strategy='uniform')
data_bins = discretizer.fit_transform(data)

# view the binned data
print(data_bins)


[[0.]
 [0.]
 [0.]
 [1.]
 [1.]
 [1.]
 [2.]
 [2.]
 [2.]
 [2.]]
