# Scale

In [3]:
import pandas as pd

df = pd.DataFrame(['D', 'C-', 'C+', 'B-', 'B', 'B+', 'A-', 'A', 'A+'],
                 index = ['poor', 'ok', 'ok', 'good', 'good', 'good', 'excellent', 'excellent', 'excellent'])
df.rename(columns={0:'Grades'}, inplace=True)
df

Unnamed: 0,Grades
poor,D
ok,C-
ok,C+
good,B-
good,B
good,B+
excellent,A-
excellent,A
excellent,A+


In [5]:
from pandas.api.types import CategoricalDtype

# Nominal Scale -> 카테고리만 나눠줌
# astype(): type 변경
grades = df['Grades'].astype(CategoricalDtype(categories=['D', 'C-', 'C+', 'B-', 'B', 'B+', 'A-', 'A', 'A+']))
grades.head()

poor     D
ok      C-
ok      C+
good    B-
good     B
Name: Grades, dtype: category
Categories (9, object): ['D', 'C-', 'C+', 'B-', ..., 'B+', 'A-', 'A', 'A+']

In [6]:
# Ordered Scale -> 순서대로
grades = df['Grades'].astype(CategoricalDtype(categories=['D', 'C-', 'C+', 'B-', 'B', 'B+', 'A-', 'A', 'A+'], ordered=True))
grades

poor          D
ok           C-
ok           C+
good         B-
good          B
good         B+
excellent    A-
excellent     A
excellent    A+
Name: Grades, dtype: category
Categories (9, object): ['D' < 'C-' < 'C+' < 'B-' ... 'B+' < 'A-' < 'A' < 'A+']

In [8]:
grades > 'B'

poor         False
ok           False
ok           False
good         False
good         False
good          True
excellent     True
excellent     True
excellent     True
Name: Grades, dtype: bool

In [9]:
# categorization of ratio scaled data
s = pd.Series([168, 180, 174, 190, 185, 179, 181, 170, 175, 169, 182, 177, 180, 171])
s.head()

0    168
1    180
2    174
3    190
4    185
dtype: int64

In [10]:
pd.cut(s, 3)

0     (167.978, 175.333]
1     (175.333, 182.667]
2     (167.978, 175.333]
3       (182.667, 190.0]
4       (182.667, 190.0]
5     (175.333, 182.667]
6     (175.333, 182.667]
7     (167.978, 175.333]
8     (167.978, 175.333]
9     (167.978, 175.333]
10    (175.333, 182.667]
11    (175.333, 182.667]
12    (175.333, 182.667]
13    (167.978, 175.333]
dtype: category
Categories (3, interval[float64]): [(167.978, 175.333] < (175.333, 182.667] < (182.667, 190.0]]

In [11]:
pd.cut(s, 3, labels = ['Small', 'Medium', 'Large'])

0      Small
1     Medium
2      Small
3      Large
4      Large
5     Medium
6     Medium
7      Small
8      Small
9      Small
10    Medium
11    Medium
12    Medium
13     Small
dtype: category
Categories (3, object): ['Small' < 'Medium' < 'Large']