In [1]:
import numpy as np
import pandas as pd

import datetime
from datetime import datetime, date

pd.set_option('display.notebook_repr_html', False)
pd.set_option('display.max_columns', 8)
pd.set_option('display.max_rows', 10)
pd.set_option('display.width', 80)

import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
lmh_values = ['low', 'high', 'medium', 'medium', 'high']
lmh_cat = pd.Categorical(lmh_values)
lmh_cat

[low, high, medium, medium, high]
Categories (3, object): [high, low, medium]

In [3]:
lmh_cat.categories

Index(['high', 'low', 'medium'], dtype='object')

In [4]:
lmh_cat.get_values()

array(['low', 'high', 'medium', 'medium', 'high'], dtype=object)

In [5]:
lmh_cat.codes

array([1, 0, 2, 2, 0], dtype=int8)

In [6]:
lmh_cat = pd.Categorical(lmh_values, categories=['low', 'medium', 'high'])
lmh_cat

[low, high, medium, medium, high]
Categories (3, object): [low, medium, high]

In [7]:
lmh_cat.codes

array([0, 2, 1, 1, 2], dtype=int8)

In [8]:
lmh_cat.sort_values()

[low, medium, medium, high, high]
Categories (3, object): [low, medium, high]

In [9]:
cat_series = pd.Series(lmh_values, dtype='category')
cat_series

0       low
1      high
2    medium
3    medium
4      high
dtype: category
Categories (3, object): [high, low, medium]

In [10]:
s = pd.Series(lmh_values)
as_cat = s.astype('category')
as_cat

0       low
1      high
2    medium
3    medium
4      high
dtype: category
Categories (3, object): [high, low, medium]

In [11]:
as_cat.cat

<pandas.core.arrays.categorical.CategoricalAccessor object at 0x7f17a0681b70>

In [12]:
as_cat.cat.categories

Index(['high', 'low', 'medium'], dtype='object')

In [13]:
np.random.seed(123456)
values = np.random.randint(0, 100, 5)
bins = pd.DataFrame({'Values': values})
bins

   Values
0      65
1      49
2      56
3      43
4      43

In [14]:
bins['Group'] = pd.cut(values, range(0, 101, 10))
bins

   Values     Group
0      65  (60, 70]
1      49  (40, 50]
2      56  (50, 60]
3      43  (40, 50]
4      43  (40, 50]

In [15]:
bins.Group

0    (60, 70]
1    (40, 50]
2    (50, 60]
3    (40, 50]
4    (40, 50]
Name: Group, dtype: category
Categories (10, interval[int64]): [(0, 10] < (10, 20] < (20, 30] < (30, 40] ... (60, 70] < (70, 80] < (80, 90] < (90, 100]]

In [16]:
metal_values = ['bronze', 'gold', 'silver', 'bronze']
metal_categories = ['bronze', 'silver', 'gold']
metals = pd.Categorical(metal_values, categories=metal_categories, ordered=True)
metals

[bronze, gold, silver, bronze]
Categories (3, object): [bronze < silver < gold]

In [17]:
metals.codes

array([0, 2, 1, 0], dtype=int8)

In [19]:
metals_reversed_values = pd.Categorical(metals.get_values()[::-1], 
                                       categories=metals.categories,
                                       ordered=True)
metals_reversed_values

[bronze, silver, gold, bronze]
Categories (3, object): [bronze < silver < gold]

In [20]:
metals

[bronze, gold, silver, bronze]
Categories (3, object): [bronze < silver < gold]

In [21]:
metals <= metals_reversed_values

array([ True, False,  True,  True])

In [22]:
metals.codes

array([0, 2, 1, 0], dtype=int8)

In [23]:
metals_reversed_values.codes

array([0, 1, 2, 0], dtype=int8)

In [24]:
pd.Categorical(['bronze', 'copper'], categories=metal_categories)

[bronze, NaN]
Categories (3, object): [bronze, silver, gold]

In [25]:
cat = pd.Categorical(['a', 'b', 'c', 'a'], categories=['a', 'b', 'c'])
cat

[a, b, c, a]
Categories (3, object): [a, b, c]

In [26]:
cat.categories = ['bronze', 'silver', 'gold']
cat

[bronze, silver, gold, bronze]
Categories (3, object): [bronze, silver, gold]

In [27]:
cat.rename_categories(['x', 'y', 'z'])

[x, y, z, x]
Categories (3, object): [x, y, z]

In [28]:
with_platinum = metals.add_categories(['platinum'])
with_platinum

[bronze, gold, silver, bronze]
Categories (4, object): [bronze < silver < gold < platinum]

In [29]:
no_bronze = metals.remove_categories(['bronze'])
no_bronze

[NaN, gold, silver, NaN]
Categories (2, object): [silver < gold]

In [30]:
with_platinum.remove_unused_categories()

[bronze, gold, silver, bronze]
Categories (3, object): [bronze < silver < gold]

In [31]:
s = pd.Series(['one', 'two', 'four', 'five'], dtype='category')
s

0     one
1     two
2    four
3    five
dtype: category
Categories (4, object): [five, four, one, two]

In [32]:
s.cat

<pandas.core.arrays.categorical.CategoricalAccessor object at 0x7f1778513710>