In [1]:
import pandas as pd

In [2]:
# Create a sample DataFrame
data = {
    'Name': ['Jon Snow', 'Daenerys Targaryen', 'Tyrion Lannister', 'Arya Stark', 'Cersei Lannister'],
    'House': ['Stark', 'Targaryen', 'Lannister', 'Stark', 'Lannister'],
    'Status': ['Alive', 'Deceased', 'Alive', 'Alive', 'Deceased'],
    'Gender': ['Male', 'Female', 'Male', 'Female', 'Female'],
    'Age': [35, 30, 40, 18, 45],
    'Total_Appearances': [10, 12, 15, 8, 6],
    'Battles_Fought': [3, 5, 2, 1, 4]
}
df = pd.DataFrame(data)

In [3]:
# Convert 'House' column to a Categorical variable
df['House'] = pd.Categorical(df['House'])

In [4]:
# Example 1: Displaying the categories in a Categorical variable
print("Example 1:")
print(df['House'].cat.categories)

Example 1:
Index(['Lannister', 'Stark', 'Targaryen'], dtype='object')


In [5]:
# Example 2: Displaying the codes for each category in a Categorical variable
print("\nExample 2:")
print(df['House'].cat.codes)


Example 2:
0    1
1    2
2    0
3    1
4    0
dtype: int8


In [6]:
# Example 3: Renaming categories in a Categorical variable
df['House'].cat.categories = ['The Starks', 'The Targaryens', 'The Lannisters']
print("\nExample 3:")
print(df)



Example 3:
                 Name           House    Status  Gender  Age  \
0            Jon Snow  The Targaryens     Alive    Male   35   
1  Daenerys Targaryen  The Lannisters  Deceased  Female   30   
2    Tyrion Lannister      The Starks     Alive    Male   40   
3          Arya Stark  The Targaryens     Alive  Female   18   
4    Cersei Lannister      The Starks  Deceased  Female   45   

   Total_Appearances  Battles_Fought  
0                 10               3  
1                 12               5  
2                 15               2  
3                  8               1  
4                  6               4  


  df['House'].cat.categories = ['The Starks', 'The Targaryens', 'The Lannisters']


In [7]:
# Example 4: Sorting a DataFrame based on a Categorical variable
df.sort_values(by='House', inplace=True)
print("\nExample 4:")
print(df)



Example 4:
                 Name           House    Status  Gender  Age  \
2    Tyrion Lannister      The Starks     Alive    Male   40   
4    Cersei Lannister      The Starks  Deceased  Female   45   
0            Jon Snow  The Targaryens     Alive    Male   35   
3          Arya Stark  The Targaryens     Alive  Female   18   
1  Daenerys Targaryen  The Lannisters  Deceased  Female   30   

   Total_Appearances  Battles_Fought  
2                 15               2  
4                  6               4  
0                 10               3  
3                  8               1  
1                 12               5  


In [8]:
# Example 5: Creating a new Categorical variable based on numeric bins
df['Age_Category'] = pd.cut(df['Age'], bins=[0, 20, 30, 40, 50], labels=['Child', 'Young Adult', 'Adult', 'Senior'])
print("\nExample 5:")
print(df)



Example 5:
                 Name           House    Status  Gender  Age  \
2    Tyrion Lannister      The Starks     Alive    Male   40   
4    Cersei Lannister      The Starks  Deceased  Female   45   
0            Jon Snow  The Targaryens     Alive    Male   35   
3          Arya Stark  The Targaryens     Alive  Female   18   
1  Daenerys Targaryen  The Lannisters  Deceased  Female   30   

   Total_Appearances  Battles_Fought Age_Category  
2                 15               2        Adult  
4                  6               4       Senior  
0                 10               3        Adult  
3                  8               1        Child  
1                 12               5  Young Adult  


In [9]:
# Example 6: Using the Categorical variable in groupby and aggregation
house_group = df.groupby('House')
house_stats = house_group['Age'].agg(['mean', 'min', 'max'])
print("\nExample 6:")
print(house_stats)


Example 6:
                mean  min  max
House                         
The Starks      42.5   40   45
The Targaryens  26.5   18   35
The Lannisters  30.0   30   30


In [10]:
# Example 7: Creating dummy variables from a Categorical variable
dummies = pd.get_dummies(df['House'], prefix='House', drop_first=True)
df = pd.concat([df, dummies], axis=1)
print("\nExample 7:")
print(df)


Example 7:
                 Name           House    Status  Gender  Age  \
2    Tyrion Lannister      The Starks     Alive    Male   40   
4    Cersei Lannister      The Starks  Deceased  Female   45   
0            Jon Snow  The Targaryens     Alive    Male   35   
3          Arya Stark  The Targaryens     Alive  Female   18   
1  Daenerys Targaryen  The Lannisters  Deceased  Female   30   

   Total_Appearances  Battles_Fought Age_Category  House_The Targaryens  \
2                 15               2        Adult                     0   
4                  6               4       Senior                     0   
0                 10               3        Adult                     1   
3                  8               1        Child                     1   
1                 12               5  Young Adult                     0   

   House_The Lannisters  
2                     0  
4                     0  
0                     0  
3                     0  
1                     

In [11]:
# Example 8: Converting a column to a Categorical variable with specified categories
gender_categories = ['Male', 'Female', 'Other']
df['Gender'] = pd.Categorical(df['Gender'], categories=gender_categories, ordered=False)
print("\nExample 8:")
print(df)


Example 8:
                 Name           House    Status  Gender  Age  \
2    Tyrion Lannister      The Starks     Alive    Male   40   
4    Cersei Lannister      The Starks  Deceased  Female   45   
0            Jon Snow  The Targaryens     Alive    Male   35   
3          Arya Stark  The Targaryens     Alive  Female   18   
1  Daenerys Targaryen  The Lannisters  Deceased  Female   30   

   Total_Appearances  Battles_Fought Age_Category  House_The Targaryens  \
2                 15               2        Adult                     0   
4                  6               4       Senior                     0   
0                 10               3        Adult                     1   
3                  8               1        Child                     1   
1                 12               5  Young Adult                     0   

   House_The Lannisters  
2                     0  
4                     0  
0                     0  
3                     0  
1                     

In [12]:
# Example 9: Comparing Categorical variables for equality
df['Gender_Equal_To_Male'] = df['Gender'] == 'Male'
print("\nExample 9:")
print(df)


Example 9:
                 Name           House    Status  Gender  Age  \
2    Tyrion Lannister      The Starks     Alive    Male   40   
4    Cersei Lannister      The Starks  Deceased  Female   45   
0            Jon Snow  The Targaryens     Alive    Male   35   
3          Arya Stark  The Targaryens     Alive  Female   18   
1  Daenerys Targaryen  The Lannisters  Deceased  Female   30   

   Total_Appearances  Battles_Fought Age_Category  House_The Targaryens  \
2                 15               2        Adult                     0   
4                  6               4       Senior                     0   
0                 10               3        Adult                     1   
3                  8               1        Child                     1   
1                 12               5  Young Adult                     0   

   House_The Lannisters  Gender_Equal_To_Male  
2                     0                  True  
4                     0                 False  
0       

In [13]:
# Example 10: Counting occurrences of each category in a Categorical variable
house_counts = df['House'].value_counts()
print("\nExample 10:")
print(house_counts)


Example 10:
The Starks        2
The Targaryens    2
The Lannisters    1
Name: House, dtype: int64


In [14]:
# Example 11: Displaying descriptive statistics for a Categorical variable
house_description = df['House'].describe()
print("\nExample 11:")
print(house_description)



Example 11:
count              5
unique             3
top       The Starks
freq               2
Name: House, dtype: object


In [15]:
# Example 12: Changing the order of categories in a Categorical variable
df['House'] = df['House'].cat.reorder_categories(['The Lannisters', 'The Starks', 'The Targaryens'])
print("\nExample 12:")
print(df)


Example 12:
                 Name           House    Status  Gender  Age  \
2    Tyrion Lannister      The Starks     Alive    Male   40   
4    Cersei Lannister      The Starks  Deceased  Female   45   
0            Jon Snow  The Targaryens     Alive    Male   35   
3          Arya Stark  The Targaryens     Alive  Female   18   
1  Daenerys Targaryen  The Lannisters  Deceased  Female   30   

   Total_Appearances  Battles_Fought Age_Category  House_The Targaryens  \
2                 15               2        Adult                     0   
4                  6               4       Senior                     0   
0                 10               3        Adult                     1   
3                  8               1        Child                     1   
1                 12               5  Young Adult                     0   

   House_The Lannisters  Gender_Equal_To_Male  
2                     0                  True  
4                     0                 False  
0      

In [16]:
# Example 13: Replacing values in a Categorical variable
df['Gender'] = df['Gender'].replace({'Other': 'Unknown'})
print("\nExample 13:")
print(df)


Example 13:
                 Name           House    Status  Gender  Age  \
2    Tyrion Lannister      The Starks     Alive    Male   40   
4    Cersei Lannister      The Starks  Deceased  Female   45   
0            Jon Snow  The Targaryens     Alive    Male   35   
3          Arya Stark  The Targaryens     Alive  Female   18   
1  Daenerys Targaryen  The Lannisters  Deceased  Female   30   

   Total_Appearances  Battles_Fought Age_Category  House_The Targaryens  \
2                 15               2        Adult                     0   
4                  6               4       Senior                     0   
0                 10               3        Adult                     1   
3                  8               1        Child                     1   
1                 12               5  Young Adult                     0   

   House_The Lannisters  Gender_Equal_To_Male  
2                     0                  True  
4                     0                 False  
0      

In [17]:
# Example 14: Removing unused categories from a Categorical variable
df['Gender'].cat.remove_unused_categories(inplace=True)
print("\nExample 14:")
print(df)


Example 14:
                 Name           House    Status  Gender  Age  \
2    Tyrion Lannister      The Starks     Alive    Male   40   
4    Cersei Lannister      The Starks  Deceased  Female   45   
0            Jon Snow  The Targaryens     Alive    Male   35   
3          Arya Stark  The Targaryens     Alive  Female   18   
1  Daenerys Targaryen  The Lannisters  Deceased  Female   30   

   Total_Appearances  Battles_Fought Age_Category  House_The Targaryens  \
2                 15               2        Adult                     0   
4                  6               4       Senior                     0   
0                 10               3        Adult                     1   
3                  8               1        Child                     1   
1                 12               5  Young Adult                     0   

   House_The Lannisters  Gender_Equal_To_Male  
2                     0                  True  
4                     0                 False  
0      

  df['Gender'].cat.remove_unused_categories(inplace=True)


In [18]:
# Example 15: Changing the data type of a column to Categorical
df['Total_Appearances'] = df['Total_Appearances'].astype('category')
print("\nExample 15:")
print(df.dtypes)


Example 15:
Name                      object
House                   category
Status                    object
Gender                  category
Age                        int64
Total_Appearances       category
Battles_Fought             int64
Age_Category            category
House_The Targaryens       uint8
House_The Lannisters       uint8
Gender_Equal_To_Male        bool
dtype: object


In [19]:
# Example 16: Creating a Categorical variable with ordered categories
df['Age_Category'] = pd.Categorical(df['Age_Category'], categories=['Child', 'Young Adult', 'Adult', 'Senior'], ordered=True)
print("\nExample 16:")
print(df)


Example 16:
                 Name           House    Status  Gender  Age  \
2    Tyrion Lannister      The Starks     Alive    Male   40   
4    Cersei Lannister      The Starks  Deceased  Female   45   
0            Jon Snow  The Targaryens     Alive    Male   35   
3          Arya Stark  The Targaryens     Alive  Female   18   
1  Daenerys Targaryen  The Lannisters  Deceased  Female   30   

  Total_Appearances  Battles_Fought Age_Category  House_The Targaryens  \
2                15               2        Adult                     0   
4                 6               4       Senior                     0   
0                10               3        Adult                     1   
3                 8               1        Child                     1   
1                12               5  Young Adult                     0   

   House_The Lannisters  Gender_Equal_To_Male  
2                     0                  True  
4                     0                 False  
0            

In [20]:
# Example 17: Using Categorical variable in logical conditions
print("\nExample 17:")
print(df[df['Age_Category'] > 'Young Adult'])


Example 17:
               Name           House    Status  Gender  Age Total_Appearances  \
2  Tyrion Lannister      The Starks     Alive    Male   40                15   
4  Cersei Lannister      The Starks  Deceased  Female   45                 6   
0          Jon Snow  The Targaryens     Alive    Male   35                10   

   Battles_Fought Age_Category  House_The Targaryens  House_The Lannisters  \
2               2        Adult                     0                     0   
4               4       Senior                     0                     0   
0               3        Adult                     1                     0   

   Gender_Equal_To_Male  
2                  True  
4                 False  
0                  True  


In [21]:
# Example 18: Creating a Categorical variable from a Series with custom labels
battles_categories = pd.Categorical(df['Battles_Fought'], labels=['Low', 'Medium', 'High'], ordered=True)
df['Battles_Category'] = battles_categories
print("\nExample 18:")
print(df)

# Example 19: Applying a function to elements of a Categorical variable
def battles_category_description(category):
    if category == 'Low':
        return 'Few Battles'
    elif category == 'Medium':
        return 'Moderate Battles'
    elif category == 'High':
        return 'Many Battles'

df['Battles_Category_Description'] = df['Battles_Category'].apply(battles_category_description)
print("\nExample 19:")
print(df)

TypeError: Categorical.__init__() got an unexpected keyword argument 'labels'

In [22]:
# Example 20: Changing the order of categories in a Categorical variable using sort_values()
df.sort_values(by='House', inplace=True)
print("\nExample 20:")
print(df)


Example 20:
                 Name           House    Status  Gender  Age  \
1  Daenerys Targaryen  The Lannisters  Deceased  Female   30   
2    Tyrion Lannister      The Starks     Alive    Male   40   
4    Cersei Lannister      The Starks  Deceased  Female   45   
0            Jon Snow  The Targaryens     Alive    Male   35   
3          Arya Stark  The Targaryens     Alive  Female   18   

  Total_Appearances  Battles_Fought Age_Category  House_The Targaryens  \
1                12               5  Young Adult                     0   
2                15               2        Adult                     0   
4                 6               4       Senior                     0   
0                10               3        Adult                     1   
3                 8               1        Child                     1   

   House_The Lannisters  Gender_Equal_To_Male  
1                     1                 False  
2                     0                  True  
4            

In [23]:
# Example 21: Creating a Categorical variable from a Series with custom labels
battles_categories = pd.Categorical(df['Battles_Fought'], labels=['Low', 'Medium', 'High'], ordered=True)
df['Battles_Category'] = battles_categories
print("\nExample 21:")
print(df)

# Example 22: Applying a function to elements of a Categorical variable
def battles_category_description(category):
    if category == 'Low':
        return 'Few Battles'
    elif category == 'Medium':
        return 'Moderate Battles'
    elif category == 'High':
        return 'Many Battles'

df['Battles_Category_Description'] = df['Battles_Category'].apply(battles_category_description)
print("\nExample 22:")
print(df)

TypeError: Categorical.__init__() got an unexpected keyword argument 'labels'

In [24]:

# Example 23: Changing the order of categories in a Categorical variable using sort_values()
df.sort_values(by='House', inplace=True)
print("\nExample 23:")
print(df)



Example 23:
                 Name           House    Status  Gender  Age  \
1  Daenerys Targaryen  The Lannisters  Deceased  Female   30   
2    Tyrion Lannister      The Starks     Alive    Male   40   
4    Cersei Lannister      The Starks  Deceased  Female   45   
0            Jon Snow  The Targaryens     Alive    Male   35   
3          Arya Stark  The Targaryens     Alive  Female   18   

  Total_Appearances  Battles_Fought Age_Category  House_The Targaryens  \
1                12               5  Young Adult                     0   
2                15               2        Adult                     0   
4                 6               4       Senior                     0   
0                10               3        Adult                     1   
3                 8               1        Child                     1   

   House_The Lannisters  Gender_Equal_To_Male  
1                     1                 False  
2                     0                  True  
4            

In [25]:
# Example 24: Creating a Categorical variable from a Series with custom labels
battles_categories = pd.Categorical(df['Battles_Fought'], labels=['Low', 'Medium', 'High'], ordered=True)
df['Battles_Category'] = battles_categories
print("\nExample 24:")
print(df)

TypeError: Categorical.__init__() got an unexpected keyword argument 'labels'

In [26]:
# Example 25: Applying a function to elements of a Categorical variable
def battles_category_description(category):
    if category == 'Low':
        return 'Few Battles'
    elif category == 'Medium':
        return 'Moderate Battles'
    elif category == 'High':
        return 'Many Battles'

df['Battles_Category_Description'] = df['Battles_Category'].apply(battles_category_description)
print("\nExample 25:")
print(df)

KeyError: 'Battles_Category'

In [27]:
# Example 26: Using the Categorical variable in pivot tables
pivot_table = df.pivot_table(index='House', columns='Gender', values='Age', aggfunc='mean')
print("\nExample 26:")
print(pivot_table)


Example 26:
Gender          Male  Female
House                       
The Lannisters   NaN    30.0
The Starks      40.0    45.0
The Targaryens  35.0    18.0


In [28]:
# Example 27: Using Categorical variable for grouping and aggregating
age_group = df.groupby('Age_Category')['Age'].mean()
print("\nExample 27:")
print(age_group)


Example 27:
Age_Category
Child          18.0
Young Adult    30.0
Adult          37.5
Senior         45.0
Name: Age, dtype: float64


In [29]:
# Example 28: Combining multiple Categorical variables for grouping and aggregating
grouped_data = df.groupby(['House', 'Age_Category'])['Total_Appearances'].sum().reset_index()
print("\nExample 28:")
print(grouped_data)

TypeError: category type does not support sum operations

In [30]:
# Example 29: Creating a Categorical variable from a Series with custom categories
house_categories = pd.Categorical(df['House'], categories=['The Starks', 'The Targaryens', 'The Lannisters'])
df['House_Category'] = house_categories
print("\nExample 29:")
print(df)


Example 29:
                 Name           House    Status  Gender  Age  \
1  Daenerys Targaryen  The Lannisters  Deceased  Female   30   
2    Tyrion Lannister      The Starks     Alive    Male   40   
4    Cersei Lannister      The Starks  Deceased  Female   45   
0            Jon Snow  The Targaryens     Alive    Male   35   
3          Arya Stark  The Targaryens     Alive  Female   18   

  Total_Appearances  Battles_Fought Age_Category  House_The Targaryens  \
1                12               5  Young Adult                     0   
2                15               2        Adult                     0   
4                 6               4       Senior                     0   
0                10               3        Adult                     1   
3                 8               1        Child                     1   

   House_The Lannisters  Gender_Equal_To_Male  House_Category  
1                     1                 False  The Lannisters  
2                     0      

In [31]:
# Example 30: Using Categorical variable for conditional filtering
female_starks = df[(df['House'] == 'The Starks') & (df['Gender'] == 'Female')]
print("\nExample 30:")
print(female_starks)


Example 30:
               Name       House    Status  Gender  Age Total_Appearances  \
4  Cersei Lannister  The Starks  Deceased  Female   45                 6   

   Battles_Fought Age_Category  House_The Targaryens  House_The Lannisters  \
4               4       Senior                     0                     0   

   Gender_Equal_To_Male House_Category  
4                 False     The Starks  


In [32]:
# Example 31: Setting the order of categories based on a custom list
custom_order = ['The Targaryens', 'The Starks', 'The Lannisters']
df['House'] = df['House'].cat.set_categories(custom_order)
print("\nExample 31:")
print(df)


Example 31:
                 Name           House    Status  Gender  Age  \
1  Daenerys Targaryen  The Lannisters  Deceased  Female   30   
2    Tyrion Lannister      The Starks     Alive    Male   40   
4    Cersei Lannister      The Starks  Deceased  Female   45   
0            Jon Snow  The Targaryens     Alive    Male   35   
3          Arya Stark  The Targaryens     Alive  Female   18   

  Total_Appearances  Battles_Fought Age_Category  House_The Targaryens  \
1                12               5  Young Adult                     0   
2                15               2        Adult                     0   
4                 6               4       Senior                     0   
0                10               3        Adult                     1   
3                 8               1        Child                     1   

   House_The Lannisters  Gender_Equal_To_Male  House_Category  
1                     1                 False  The Lannisters  
2                     0      

In [33]:
# Example 32: Changing the data type of a column to Categorical with memory optimization
df['Battles_Fought'] = df['Battles_Fought'].astype('category')
print("\nExample 32:")
print(df.dtypes)


Example 32:
Name                      object
House                   category
Status                    object
Gender                  category
Age                        int64
Total_Appearances       category
Battles_Fought          category
Age_Category            category
House_The Targaryens       uint8
House_The Lannisters       uint8
Gender_Equal_To_Male        bool
House_Category          category
dtype: object


In [34]:
# Example 33: Using Categorical variable for filtering based on a condition
medium_battles = df[df['Battles_Category'] == 'Medium']
print("\nExample 33:")
print(medium_battles)

KeyError: 'Battles_Category'

In [35]:
# Example 34: Applying a function element-wise to a Categorical variable and creating a new DataFrame
df['House_Code'] = df['House'].apply(lambda x: 1 if x == 'The Starks' else 0)
print("\nExample 34:")
print(df[['House', 'House_Code']])


Example 34:
            House  House_Code
1  The Lannisters           0
2      The Starks           1
4      The Starks           1
0  The Targaryens           0
3  The Targaryens           0


In [36]:
# Example 35: Creating a Categorical variable with custom categories and labels
age_categories = pd.Categorical(df['Age_Category'], categories=['Young Adult', 'Adult', 'Senior'], labels=[1, 2, 3])
df['Age_Category_Code'] = age_categories
print("\nExample 35:")
print(df[['Age_Category', 'Age_Category_Code']])

TypeError: Categorical.__init__() got an unexpected keyword argument 'labels'