In [1]:
import pandas as pd

In [2]:
# Create a sample DataFrame with categorical data
data = {
    'Category': ['A', 'B', 'A', 'C', 'B', 'A', 'C', 'C', 'B'],
    'Color': ['Red', 'Green', 'Blue', 'Red', 'Green', 'Red', 'Blue', 'Red', 'Green'],
    'Size': ['Small', 'Medium', 'Large', 'Small', 'Medium', 'Large', 'Medium', 'Small', 'Large'],
    'Label': [1, 0, 1, 0, 1, 0, 1, 1, 0]
}
df = pd.DataFrame(data)

In [3]:
# Example 1: One-Hot Encoding using pd.get_dummies
df_encoded = pd.get_dummies(df, columns=['Category', 'Color', 'Size'], prefix=['Cat', 'Col', 'Size'])


In [4]:
# Example 2: Label Encoding using pd.factorize
df['Size_Label'] = pd.factorize(df['Size'])[0]


In [5]:
# Example 3: Ordinal Encoding using a custom mapping
size_mapping = {'Small': 1, 'Medium': 2, 'Large': 3}
df['Size_Ordinal'] = df['Size'].map(size_mapping)


In [6]:
pip install category-encoders

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip available: 22.3.1 -> 23.2.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [7]:
# Example 4: Binary Encoding using category_encoders library
import category_encoders as ce
encoder = ce.BinaryEncoder(cols=['Size'])
df_binary_encoded = encoder.fit_transform(df)


In [8]:
# Example 5: Count Encoding using category_encoders
encoder = ce.CountEncoder(cols=['Category'])
df_count_encoded = encoder.fit_transform(df)

In [9]:
# Example 6: Target Encoding using category_encoders
encoder = ce.TargetEncoder(cols=['Color'])
df_target_encoded = encoder.fit_transform(df, df['Label'])

In [10]:
# Example 7: Mean Encoding using groupby and transform
mean_encoded = df.groupby('Color')['Label'].transform('mean')
df['Color_Mean_Encoded'] = mean_encoded

In [11]:
# Example 8: Frequency Encoding using value_counts
freq_encoding = df['Color'].map(df['Color'].value_counts())
df['Color_Freq_Encoded'] = freq_encoding

In [12]:
# Example 9: Hashing Encoding using category_encoders
encoder = ce.HashingEncoder(cols=['Category'], n_components=3)
df_hash_encoded = encoder.fit_transform(df)

In [13]:
# Example 10: Leave-One-Out Encoding using category_encoders
encoder = ce.LeaveOneOutEncoder(cols=['Color'])
df_loo_encoded = encoder.fit_transform(df, df['Label'])

In [14]:
# Display a sample of the DataFrame
print(df.head(3))

  Category  Color    Size  Label  Size_Label  Size_Ordinal  \
0        A    Red   Small      1           0             1   
1        B  Green  Medium      0           1             2   
2        A   Blue   Large      1           2             3   

   Color_Mean_Encoded  Color_Freq_Encoded  
0            0.500000                   4  
1            0.333333                   3  
2            1.000000                   2  


In [15]:
# Example 11: Weight of Evidence Encoding using category_encoders
encoder = ce.WOEEncoder(cols=['Category'])
df_woe_encoded = encoder.fit_transform(df, df['Label'])

In [16]:
# Example 12: James-Stein Encoding using category_encoders
encoder = ce.JamesSteinEncoder(cols=['Category'])
df_js_encoded = encoder.fit_transform(df, df['Label'])

In [17]:
# Example 13: Backward Difference Encoding using category_encoders
encoder = ce.BackwardDifferenceEncoder(cols=['Size'])
df_bd_encoded = encoder.fit_transform(df)



In [18]:
# Example 14: Helmert Encoding using category_encoders
encoder = ce.HelmertEncoder(cols=['Size'])
df_helmert_encoded = encoder.fit_transform(df)



In [19]:
# Example 15: Polynomial Encoding using category_encoders
encoder = ce.PolynomialEncoder(cols=['Size'])
df_poly_encoded = encoder.fit_transform(df)



In [20]:
# Example 16: Bin-counting Encoding using category_encoders
encoder = ce.CatBoostEncoder(cols=['Category'])
df_cb_encoded = encoder.fit_transform(df, df['Label'])

In [21]:
# Example 17: Feature Hashing using sklearn's FeatureHasher
from sklearn.feature_extraction import FeatureHasher
hasher = FeatureHasher(n_features=3, input_type='string')
hashed_features = hasher.transform(df['Size'])
df_hashed_features = pd.DataFrame(hashed_features.toarray(), columns=['Size_hashed_1', 'Size_hashed_2', 'Size_hashed_3'])


ValueError: Samples can not be a single string. The input must be an iterable over iterables of strings.

In [22]:
# Example 18: Encoding with Frequency and Mean for high cardinality categories
category_counts = df['Category'].value_counts()
category_means = df.groupby('Category')['Label'].mean()
df['Category_Freq_Encoded'] = df['Category'].map(category_counts)
df['Category_Mean_Encoded'] = df['Category'].map(category_means)


In [23]:
# Example 19: Encoding with Frequency and Mean for Color and Size
color_counts = df['Color'].value_counts()
color_means = df.groupby('Color')['Label'].mean()
size_counts = df['Size'].value_counts()
size_means = df.groupby('Size')['Label'].mean()
df['Color_Freq_Encoded'] = df['Color'].map(color_counts)
df['Color_Mean_Encoded'] = df['Color'].map(color_means)
df['Size_Freq_Encoded'] = df['Size'].map(size_counts)
df['Size_Mean_Encoded'] = df['Size'].map(size_means)


In [24]:
# Example 20: Encoding with Frequency and Mean for Size_Label
size_label_counts = df['Size_Label'].value_counts()
size_label_means = df.groupby('Size_Label')['Label'].mean()
df['Size_Label_Freq_Encoded'] = df['Size_Label'].map(size_label_counts)
df['Size_Label_Mean_Encoded'] = df['Size_Label'].map(size_label_means)


In [25]:
# Display a sample of the DataFrame
print(df.head(3))

  Category  Color    Size  Label  Size_Label  Size_Ordinal  \
0        A    Red   Small      1           0             1   
1        B  Green  Medium      0           1             2   
2        A   Blue   Large      1           2             3   

   Color_Mean_Encoded  Color_Freq_Encoded  Category_Freq_Encoded  \
0            0.500000                   4                      3   
1            0.333333                   3                      3   
2            1.000000                   2                      3   

   Category_Mean_Encoded  Size_Freq_Encoded  Size_Mean_Encoded  \
0               0.666667                  3           0.666667   
1               0.333333                  3           0.666667   
2               0.666667                  3           0.333333   

   Size_Label_Freq_Encoded  Size_Label_Mean_Encoded  
0                        3                 0.666667  
1                        3                 0.666667  
2                        3                 0.333333

In [26]:
# Example 21: Grouping and Encoding with Grouped Mean
grouped_means = df.groupby('Category')['Label'].transform('mean')
df['Category_Grouped_Mean_Encoded'] = grouped_means

In [27]:
# Example 22: Frequency Encoding with Noise
noise = 0.1
import numpy as np
df['Color_Freq_Noise_Encoded'] = df['Color_Freq_Encoded'] + noise * np.random.randn(len(df))


In [28]:
# Example 23: Handling Unknown Categories with Target Mean Encoding
unknown_category = 'D'
df_unknown_category = df.copy()
df_unknown_category.loc[6, 'Category'] = unknown_category
df_unknown_category['Category_Unknown_Mean_Encoded'] = df_unknown_category.groupby('Category')['Label'].transform('mean')


In [29]:
# Example 24: Handling Rare Categories with Frequency Encoding
threshold = 2
rare_categories = df['Size'].value_counts()[df['Size'].value_counts() < threshold].index
df['Size_Rare_Encoded'] = df['Size'].apply(lambda x: 'Rare' if x in rare_categories else x)
df['Size_Rare_Freq_Encoded'] = df['Size_Rare_Encoded'].map(df['Size_Rare_Encoded'].value_counts())


In [30]:
# Example 25: Mean Encoding with Cross-Validation
from sklearn.model_selection import KFold
kf = KFold(n_splits=5, shuffle=True, random_state=42)
df['Category_CV_Mean_Encoded'] = 0
for train_idx, val_idx in kf.split(df):
    mean_map = df.iloc[train_idx].groupby('Category')['Label'].mean()
    df.loc[val_idx, 'Category_CV_Mean_Encoded'] = df.loc[val_idx, 'Category'].map(mean_map)


In [31]:
# Example 26: Weight of Evidence Encoding with Cross-Validation
encoder = ce.WOEEncoder(cols=['Size_Label'])
df['Size_Label_CV_WOE_Encoded'] = 0
for train_idx, val_idx in kf.split(df):
    encoder.fit(df.iloc[train_idx], df.iloc[train_idx]['Label'])
    df.loc[val_idx, 'Size_Label_CV_WOE_Encoded'] = encoder.transform(df.iloc[val_idx])['Size_Label']


In [32]:
# Example 27: Handling Categorical Missing Data with Encoding
df_missing = df.copy()
df_missing.loc[1, 'Color'] = None
df_missing.loc[3, 'Color'] = None
df_missing['Color_Missing_Encoded'] = df_missing['Color'].fillna('Missing')


In [33]:
# Example 28: Custom Encoding for Ordinal Categories
ordinal_mapping = {'Small': 1, 'Medium': 2, 'Large': 3, 'Missing': 0}
df_missing['Size_Ordinal_Custom_Encoded'] = df_missing['Size_Missing_Encoded'].map(ordinal_mapping)


KeyError: 'Size_Missing_Encoded'

In [34]:
# Example 29: Count Encoding with Smoothing
smoothing = 2
df['Color_Count_Smooth_Encoded'] = (df['Color'].map(df['Color'].value_counts()) + smoothing) / (df['Color'].value_counts().sum() + smoothing)


In [36]:
# Example 30: Combining Multiple Encodings
df_combined = df.copy()
df_combined['Category_Freq_Size_Label_Mean_Encoded'] = df_combined['Category_Freq_Encoded'] * df_combined['Size_Label_Mean_Encoded']


In [37]:
# Display a sample of the DataFrame
print(df.head(3))

  Category  Color    Size  Label  Size_Label  Size_Ordinal  \
0        A    Red   Small      1           0             1   
1        B  Green  Medium      0           1             2   
2        A   Blue   Large      1           2             3   

   Color_Mean_Encoded  Color_Freq_Encoded  Category_Freq_Encoded  \
0            0.500000                   4                      3   
1            0.333333                   3                      3   
2            1.000000                   2                      3   

   Category_Mean_Encoded  ...  Size_Mean_Encoded  Size_Label_Freq_Encoded  \
0               0.666667  ...           0.666667                        3   
1               0.333333  ...           0.666667                        3   
2               0.666667  ...           0.333333                        3   

   Size_Label_Mean_Encoded  Category_Grouped_Mean_Encoded  \
0                 0.666667                       0.666667   
1                 0.666667                     

In [38]:
# Example 31: Frequency Encoding with Cross-Validation
df['Size_CV_Freq_Encoded'] = 0
for train_idx, val_idx in kf.split(df):
    freq_map = df.iloc[train_idx]['Size'].value_counts()
    df.loc[val_idx, 'Size_CV_Freq_Encoded'] = df.loc[val_idx, 'Size'].map(freq_map)


In [39]:
# Example 32: Mean Encoding with Feature Interaction
df['Category_Size_Label_Mean_Encoded'] = df.groupby(['Category', 'Size_Label'])['Label'].transform('mean')


In [40]:
# Example 33: Label Encoding with Order
order_mapping = {'Small': 1, 'Medium': 2, 'Large': 3}
df['Size_Label_Order_Encoded'] = df['Size_Label'].map(order_mapping)

In [41]:
# Example 34: Encoding with Lag Features
df_lag_encoded = df.copy()
df_lag_encoded['Label_Lag1'] = df_lag_encoded.groupby('Category')['Label'].shift(1)
df_lag_encoded['Label_Lag2'] = df_lag_encoded.groupby('Category')['Label'].shift(2)


In [42]:
# Example 35: Encoding with Rolling Mean
df_rolling_encoded = df.copy()
df_rolling_encoded['Rolling_Mean'] = df_rolling_encoded.groupby('Category')['Label'].rolling(window=2, min_periods=1).mean().reset_index(0, drop=True)


In [43]:
# Example 36: Encoding with Expanding Mean
df_expanding_encoded = df.copy()
df_expanding_encoded['Expanding_Mean'] = df_expanding_encoded.groupby('Category')['Label'].expanding(min_periods=1).mean().reset_index(0, drop=True)


In [44]:
# Example 37: Encoding with Moving Average
df_moving_avg_encoded = df.copy()
df_moving_avg_encoded['Moving_Avg'] = df_moving_avg_encoded.groupby('Category')['Label'].rolling(window=2, min_periods=1).mean().reset_index(0, drop=True)


In [45]:
# Example 38: Encoding with Exponential Moving Average
span = 2
alpha = 2 / (span + 1)
df_ema_encoded = df.copy()
df_ema_encoded['EMA'] = df_ema_encoded.groupby('Category')['Label'].ewm(span=span, adjust=False).mean().reset_index(0, drop=True)


In [46]:
# Example 39: Encoding with Shifted Rolling Mean
df_shifted_encoded = df.copy()
df_shifted_encoded['Shifted_Rolling_Mean'] = df_shifted_encoded.groupby('Category')['Label'].rolling(window=2, min_periods=1).mean().shift(1).reset_index(0, drop=True)


In [47]:
# Example 40: Encoding with Expanding Max
df_expanding_max_encoded = df.copy()
df_expanding_max_encoded['Expanding_Max'] = df_expanding_max_encoded.groupby('Category')['Label'].expanding(min_periods=1).max().reset_index(0, drop=True)


In [48]:
# Display a sample of the DataFrame
print(df.head(3))

  Category  Color    Size  Label  Size_Label  Size_Ordinal  \
0        A    Red   Small      1           0             1   
1        B  Green  Medium      0           1             2   
2        A   Blue   Large      1           2             3   

   Color_Mean_Encoded  Color_Freq_Encoded  Category_Freq_Encoded  \
0            0.500000                   4                      3   
1            0.333333                   3                      3   
2            1.000000                   2                      3   

   Category_Mean_Encoded  ...  Category_Grouped_Mean_Encoded  \
0               0.666667  ...                       0.666667   
1               0.333333  ...                       0.333333   
2               0.666667  ...                       0.666667   

   Color_Freq_Noise_Encoded  Size_Rare_Encoded  Size_Rare_Freq_Encoded  \
0                  3.871235              Small                       3   
1                  2.953926             Medium                       3   

In [49]:
# Example 41: Encoding with Shifted Expanding Max
df_shifted_expanding_max_encoded = df.copy()
df_shifted_expanding_max_encoded['Shifted_Expanding_Max'] = df_shifted_expanding_max_encoded.groupby('Category')['Label'].expanding(min_periods=1).max().shift(1).reset_index(0, drop=True)


In [50]:
# Example 42: Encoding with Cumulative Count
df_cumulative_count_encoded = df.copy()
df_cumulative_count_encoded['Cumulative_Count'] = df_cumulative_count_encoded.groupby('Category').cumcount()


In [51]:
# Example 43: Encoding with Rank
df_rank_encoded = df.copy()
df_rank_encoded['Rank'] = df_rank_encoded.groupby('Category')['Label'].rank()


In [52]:
# Example 44: Encoding with Group Mean and Size
df_group_encoded = df.copy()
df_group_encoded['Category_Size_Label_Group_Mean'] = df_group_encoded.groupby(['Category', 'Size_Label'])['Label'].transform('mean')
df_group_encoded['Category_Size_Group_Size'] = df_group_encoded.groupby(['Category', 'Size'])['Label'].transform('size')


In [53]:
# Example 45: Encoding with Group Cumulative Count
df_group_cumulative_count_encoded = df.copy()
df_group_cumulative_count_encoded['Category_Group_Cumulative_Count'] = df_group_cumulative_count_encoded.groupby('Category').cumcount()


In [54]:
# Example 46: Encoding with Group Rank
df_group_rank_encoded = df.copy()
df_group_rank_encoded['Category_Group_Rank'] = df_group_rank_encoded.groupby('Category')['Label'].rank()


In [55]:
# Example 47: Encoding with Group Expanding Max
df_group_expanding_max_encoded = df.copy()
df_group_expanding_max_encoded['Category_Group_Expanding_Max'] = df_group_expanding_max_encoded.groupby('Category')['Label'].expanding(min_periods=1).max().reset_index(0, drop=True)


In [56]:
# Example 48: Encoding with Group Shifted Expanding Max
df_group_shifted_expanding_max_encoded = df.copy()
df_group_shifted_expanding_max_encoded['Category_Group_Shifted_Expanding_Max'] = df_group_shifted_expanding_max_encoded.groupby('Category')['Label'].expanding(min_periods=1).max().shift(1).reset_index(0, drop=True)


In [57]:
# Example 49: Encoding with Group Cumulative Sum
df_group_cumulative_sum_encoded = df.copy()
df_group_cumulative_sum_encoded['Category_Group_Cumulative_Sum'] = df_group_cumulative_sum_encoded.groupby('Category')['Label'].cumsum()


In [58]:
# Example 50: Encoding with Group Z-Score
df_group_zscore_encoded = df.copy()
df_group_zscore_encoded['Category_Group_Z_Score'] = (df_group_zscore_encoded['Label'] - df_group_zscore_encoded.groupby('Category')['Label'].transform('mean')) / df_group_zscore_encoded.groupby('Category')['Label'].transform('std')


In [59]:
# Display a sample of the DataFrame
print(df.head(3))


  Category  Color    Size  Label  Size_Label  Size_Ordinal  \
0        A    Red   Small      1           0             1   
1        B  Green  Medium      0           1             2   
2        A   Blue   Large      1           2             3   

   Color_Mean_Encoded  Color_Freq_Encoded  Category_Freq_Encoded  \
0            0.500000                   4                      3   
1            0.333333                   3                      3   
2            1.000000                   2                      3   

   Category_Mean_Encoded  ...  Category_Grouped_Mean_Encoded  \
0               0.666667  ...                       0.666667   
1               0.333333  ...                       0.333333   
2               0.666667  ...                       0.666667   

   Color_Freq_Noise_Encoded  Size_Rare_Encoded  Size_Rare_Freq_Encoded  \
0                  3.871235              Small                       3   
1                  2.953926             Medium                       3   

In [60]:
# Example 51: Encoding with Group Shifted Cumulative Sum
df_group_shifted_cumulative_sum_encoded = df.copy()
df_group_shifted_cumulative_sum_encoded['Category_Group_Shifted_Cumulative_Sum'] = df_group_shifted_cumulative_sum_encoded.groupby('Category')['Label'].cumsum().shift(1)


In [61]:
# Example 52: Encoding with Group Min-Max Scaling
df_group_minmax_encoded = df.copy()
df_group_minmax_encoded['Category_Group_MinMax'] = (df_group_minmax_encoded['Label'] - df_group_minmax_encoded.groupby('Category')['Label'].transform('min')) / (df_group_minmax_encoded.groupby('Category')['Label'].transform('max') - df_group_minmax_encoded.groupby('Category')['Label'].transform('min'))


In [62]:
# Example 53: Encoding with Group Standardization
df_group_standardized_encoded = df.copy()
df_group_standardized_encoded['Category_Group_Standardized'] = (df_group_standardized_encoded['Label'] - df_group_standardized_encoded.groupby('Category')['Label'].transform('mean')) / df_group_standardized_encoded.groupby('Category')['Label'].transform('std')


In [63]:
# Example 54: Encoding with Group Robust Scaling
df_group_robust_encoded = df.copy()
df_group_robust_encoded['Category_Group_Robust'] = (df_group_robust_encoded['Label'] - df_group_robust_encoded.groupby('Category')['Label'].transform('median')) / (df_group_robust_encoded.groupby('Category')['Label'].transform('quantile', 0.75) - df_group_robust_encoded.groupby('Category')['Label'].transform('quantile', 0.25))


In [64]:
# Example 55: Encoding with Group Min-Max Scaling and Shifted Mean
df_group_minmax_shifted_encoded = df.copy()
df_group_minmax_shifted_encoded['Category_Group_MinMax_Shifted'] = (df_group_minmax_shifted_encoded['Label'] - df_group_minmax_shifted_encoded.groupby('Category')['Label'].transform('min')) / (df_group_minmax_shifted_encoded.groupby('Category')['Label'].transform('max') - df_group_minmax_shifted_encoded.groupby('Category')['Label'].transform('min')) + df_group_minmax_shifted_encoded.groupby('Category')['Label'].transform('mean')


In [65]:
# Example 56: Encoding with Group Z-Score and Shifted Median
df_group_zscore_shifted_encoded = df.copy()
df_group_zscore_shifted_encoded['Category_Group_Z_Score_Shifted'] = (df_group_zscore_shifted_encoded['Label'] - df_group_zscore_shifted_encoded.groupby('Category')['Label'].transform('mean')) / df_group_zscore_shifted_encoded.groupby('Category')['Label'].transform('std') + df_group_zscore_shifted_encoded.groupby('Category')['Label'].transform('median')


In [66]:
# Example 57: Encoding with Group Rank and Normalization
df_group_rank_normalized_encoded = df.copy()
df_group_rank_normalized_encoded['Category_Group_Rank_Normalized'] = (df_group_rank_normalized_encoded.groupby('Category')['Label'].rank() - 1) / (df_group_rank_normalized_encoded.groupby('Category')['Label'].transform('size') - 1)


In [67]:
# Example 58: Encoding with Group Min-Max Scaling and Group Mean
df_group_minmax_group_mean_encoded = df.copy()
df_group_minmax_group_mean_encoded['Category_Group_MinMax_Group_Mean'] = (df_group_minmax_group_mean_encoded['Label'] - df_group_minmax_group_mean_encoded.groupby('Category')['Label'].transform('min')) / (df_group_minmax_group_mean_encoded.groupby('Category')['Label'].transform('max') - df_group_minmax_group_mean_encoded.groupby('Category')['Label'].transform('min')) * df_group_minmax_group_mean_encoded.groupby('Category')['Label'].transform('mean')


In [68]:
# Example 59: Encoding with Group Z-Score and Group Mean
df_group_zscore_group_mean_encoded = df.copy()
df_group_zscore_group_mean_encoded['Category_Group_Z_Score_Group_Mean'] = (df_group_zscore_group_mean_encoded['Label'] - df_group_zscore_group_mean_encoded.groupby('Category')['Label'].transform('mean')) / df_group_zscore_group_mean_encoded.groupby('Category')['Label'].transform('std') * df_group_zscore_group_mean_encoded.groupby('Category')['Label'].transform('mean')


In [69]:
# Example 60: Encoding with Group Rank and Group Standard Deviation
df_group_rank_group_std_encoded = df.copy()
df_group_rank_group_std_encoded['Category_Group_Rank_Group_Std'] = df_group_rank_group_std_encoded.groupby('Category')['Label'].rank() / df_group_rank_group_std_encoded.groupby('Category')['Label'].transform('std')


In [70]:
# Display a sample of the DataFrame
print(df.head(3))

  Category  Color    Size  Label  Size_Label  Size_Ordinal  \
0        A    Red   Small      1           0             1   
1        B  Green  Medium      0           1             2   
2        A   Blue   Large      1           2             3   

   Color_Mean_Encoded  Color_Freq_Encoded  Category_Freq_Encoded  \
0            0.500000                   4                      3   
1            0.333333                   3                      3   
2            1.000000                   2                      3   

   Category_Mean_Encoded  ...  Category_Grouped_Mean_Encoded  \
0               0.666667  ...                       0.666667   
1               0.333333  ...                       0.333333   
2               0.666667  ...                       0.666667   

   Color_Freq_Noise_Encoded  Size_Rare_Encoded  Size_Rare_Freq_Encoded  \
0                  3.871235              Small                       3   
1                  2.953926             Medium                       3   

In [71]:
# Example 61: Encoding with Group Min-Max Scaling and Group Median
df_group_minmax_group_median_encoded = df.copy()
df_group_minmax_group_median_encoded['Category_Group_MinMax_Group_Median'] = (df_group_minmax_group_median_encoded['Label'] - df_group_minmax_group_median_encoded.groupby('Category')['Label'].transform('min')) / (df_group_minmax_group_median_encoded.groupby('Category')['Label'].transform('max') - df_group_minmax_group_median_encoded.groupby('Category')['Label'].transform('min')) * df_group_minmax_group_median_encoded.groupby('Category')['Label'].transform('median')


In [72]:
# Example 62: Encoding with Group Z-Score and Group Median
df_group_zscore_group_median_encoded = df.copy()
df_group_zscore_group_median_encoded['Category_Group_Z_Score_Group_Median'] = (df_group_zscore_group_median_encoded['Label'] - df_group_zscore_group_median_encoded.groupby('Category')['Label'].transform('mean')) / df_group_zscore_group_median_encoded.groupby('Category')['Label'].transform('std') * df_group_zscore_group_median_encoded.groupby('Category')['Label'].transform('median')


In [73]:
# Example 63: Encoding with Group Rank and Group Min-Max Scaling
df_group_rank_group_minmax_encoded = df.copy()
df_group_rank_group_minmax_encoded['Category_Group_Rank_Group_MinMax'] = df_group_rank_group_minmax_encoded.groupby('Category')['Label'].rank() / (df_group_rank_group_minmax_encoded.groupby('Category')['Label'].transform('size') - 1)


In [74]:
# Example 64: Encoding with Group Rank and Group Max
df_group_rank_group_max_encoded = df.copy()
df_group_rank_group_max_encoded['Category_Group_Rank_Group_Max'] = df_group_rank_group_max_encoded.groupby('Category')['Label'].rank() / df_group_rank_group_max_encoded.groupby('Category')['Label'].transform('max')


In [75]:
# Example 65: Encoding with Group Rank and Group Mean-Median Difference
df_group_rank_group_mean_median_diff_encoded = df.copy()
df_group_rank_group_mean_median_diff_encoded['Category_Group_Rank_Group_Mean_Median_Diff'] = df_group_rank_group_mean_median_diff_encoded.groupby('Category')['Label'].rank() - df_group_rank_group_mean_median_diff_encoded.groupby('Category')['Label'].transform('median')
