In [2]:
import pandas as pd
import numpy as np
import datetime
from datetime import datetime, date
import matplotlib.pyplot as plt

In [3]:
lmh_values = ['low', 'high', 'medium', 'medium', 'high']
lmh_cat = pd.Categorical(lmh_values)
lmh_cat

[low, high, medium, medium, high]
Categories (3, object): [high, low, medium]

In [4]:
lmh_cat.categories

Index(['high', 'low', 'medium'], dtype='object')

In [5]:
lmh_cat.to_numpy()

array(['low', 'high', 'medium', 'medium', 'high'], dtype=object)

In [6]:
lmh_cat.codes

array([1, 0, 2, 2, 0], dtype=int8)

In [7]:
lmh_cat = pd.Categorical(lmh_values, categories=['low', 'medium', 'high'])
lmh_cat

[low, high, medium, medium, high]
Categories (3, object): [low, medium, high]

In [8]:
lmh_cat.codes

array([0, 2, 1, 1, 2], dtype=int8)

In [9]:
lmh_cat.sort_values()

[low, medium, medium, high, high]
Categories (3, object): [low, medium, high]

In [10]:
cat_series = pd.Series(lmh_values, dtype='category')
cat_series

0       low
1      high
2    medium
3    medium
4      high
dtype: category
Categories (3, object): [high, low, medium]

In [11]:
s = pd.Series(lmh_values)
as_cat = s.astype('category')
cat_series

0       low
1      high
2    medium
3    medium
4      high
dtype: category
Categories (3, object): [high, low, medium]

In [12]:
cat_series.cat

<pandas.core.arrays.categorical.CategoricalAccessor object at 0x000001129A9D39C8>

In [13]:
cat_series.cat.categories

Index(['high', 'low', 'medium'], dtype='object')

In [14]:
np.random.seed(123456)
values = np.random.randint(0, 100, 5)
bins = pd.DataFrame({'values' : values})
bins

Unnamed: 0,values
0,65
1,49
2,56
3,43
4,43


In [15]:
bins['Group'] = pd.cut(values, range(0, 101, 10))
bins

Unnamed: 0,values,Group
0,65,"(60, 70]"
1,49,"(40, 50]"
2,56,"(50, 60]"
3,43,"(40, 50]"
4,43,"(40, 50]"


In [16]:
bins.Group

0    (60, 70]
1    (40, 50]
2    (50, 60]
3    (40, 50]
4    (40, 50]
Name: Group, dtype: category
Categories (10, interval[int64]): [(0, 10] < (10, 20] < (20, 30] < (30, 40] ... (60, 70] < (70, 80] < (80, 90] < (90, 100]]

In [17]:
metal_values = ['bronze', 'gold', 'silver', 'bronze']
metal_categories = ['bronze', 'silver', 'gold']
metals = pd.Categorical(metal_values, categories = metal_categories, ordered= True)
metals

[bronze, gold, silver, bronze]
Categories (3, object): [bronze < silver < gold]

In [18]:
metal_reversed_values = pd.Categorical(metals.to_numpy()[::-1], categories= metals.categories, ordered=True)
metal_reversed_values

[bronze, silver, gold, bronze]
Categories (3, object): [bronze < silver < gold]

In [19]:
metals <= metal_reversed_values

array([ True, False,  True,  True])

In [20]:
metals.codes

array([0, 2, 1, 0], dtype=int8)

In [21]:
metal_reversed_values.codes

array([0, 1, 2, 0], dtype=int8)

In [22]:
pd.Categorical(['bronze', 'copper'], categories=metal_categories)

[bronze, NaN]
Categories (3, object): [bronze, silver, gold]

In [23]:
cat = pd.Categorical(['a', 'b', 'c', 'a'], categories=['a', 'b', 'c'])
cat

[a, b, c, a]
Categories (3, object): [a, b, c]

In [24]:
cat.categories = ['bronze', 'silver', 'gold']
cat

[bronze, silver, gold, bronze]
Categories (3, object): [bronze, silver, gold]

In [25]:
cat.rename_categories(['x', 'y', 'z'])

[x, y, z, x]
Categories (3, object): [x, y, z]

In [26]:
cat

[bronze, silver, gold, bronze]
Categories (3, object): [bronze, silver, gold]

In [27]:
with_platinum = metals.add_categories(['platinum'])
with_platinum

[bronze, gold, silver, bronze]
Categories (4, object): [bronze < silver < gold < platinum]

In [28]:
no_bronze = metals.remove_categories(['bronze'])
no_bronze

[NaN, gold, silver, NaN]
Categories (2, object): [silver < gold]

In [29]:
with_platinum.remove_unused_categories()

[bronze, gold, silver, bronze]
Categories (3, object): [bronze < silver < gold]

In [30]:
s = pd.Series(['one', 'two', 'four', 'five'], dtype = 'category')
s

0     one
1     two
2    four
3    five
dtype: category
Categories (4, object): [five, four, one, two]

In [31]:
s = s.cat.set_categories(['one', 'four'])
s

0     one
1     NaN
2    four
3     NaN
dtype: category
Categories (2, object): [one, four]

In [32]:
metals.describe()

Unnamed: 0_level_0,counts,freqs
categories,Unnamed: 1_level_1,Unnamed: 2_level_1
bronze,2,0.5
silver,1,0.25
gold,1,0.25


In [33]:
metals.value_counts()

bronze    2
silver    1
gold      1
dtype: int64

In [34]:
(metals.min(), metals.max(), metals.mode())

('bronze',
 'gold',
 [bronze]
 Categories (3, object): [bronze < silver < gold])

In [35]:
np.random.seed(123456)
names = ['Ivana', 'Norris', 'Ruth', 'Lane', 'Skye', 'Sol', 'Dylan', 'Katina', 'Alissa', 'Marc']
grades = np.random.randint(50, 101, len(names))
scores = pd.DataFrame({'Name' : names, 'Grade' : grades})
scores

Unnamed: 0,Name,Grade
0,Ivana,51
1,Norris,92
2,Ruth,100
3,Lane,99
4,Skye,93
5,Sol,97
6,Dylan,93
7,Katina,77
8,Alissa,82
9,Marc,73


In [36]:
score_bins = [0, 59, 62, 66, 69, 72, 76, 79, 82, 86, 89, 92, 99, 100]
letter_grades = ['F', 'D-', 'D', 'D+', 'C-', 'C', 'C+', 'B-', 'B', 'B+', 'A-', 'A', 'A+']

In [38]:
letter_cats = pd.cut(scores.Grade, score_bins, labels = letter_grades)
scores['Letter'] = letter_cats
scores

Unnamed: 0,Name,Grade,Letter
0,Ivana,51,F
1,Norris,92,A-
2,Ruth,100,A+
3,Lane,99,A
4,Skye,93,A
5,Sol,97,A
6,Dylan,93,A
7,Katina,77,C+
8,Alissa,82,B-
9,Marc,73,C


In [39]:
letter_cats

0     F
1    A-
2    A+
3     A
4     A
5     A
6     A
7    C+
8    B-
9     C
Name: Grade, dtype: category
Categories (13, object): [F < D- < D < D+ ... B+ < A- < A < A+]

In [40]:
scores.Letter.value_counts()

A     4
A+    1
A-    1
B-    1
C+    1
C     1
F     1
B+    0
B     0
C-    0
D+    0
D     0
D-    0
Name: Letter, dtype: int64

In [41]:
scores.sort_values(by=['Letter'], ascending=False)

Unnamed: 0,Name,Grade,Letter
2,Ruth,100,A+
3,Lane,99,A
4,Skye,93,A
5,Sol,97,A
6,Dylan,93,A
1,Norris,92,A-
8,Alissa,82,B-
7,Katina,77,C+
9,Marc,73,C
0,Ivana,51,F
