In [1]:
import pandas as pd

df = pd.DataFrame({
    'Color': ['Red', 'Blue', 'Green', 'Blue', 'Red'],
    'Size': ['S', 'M', 'L', 'M', 'S'],
    'Price': [10.5, 12.0, 14.0, 13.0, 10.0]
})

print(df)


   Color Size  Price
0    Red    S   10.5
1   Blue    M   12.0
2  Green    L   14.0
3   Blue    M   13.0
4    Red    S   10.0


In [2]:
from sklearn.preprocessing import LabelEncoder

size_order = {'S': 0, 'M': 1, 'L': 2}
df['Size_Label'] = df['Size'].map(size_order)

print(df)


   Color Size  Price  Size_Label
0    Red    S   10.5           0
1   Blue    M   12.0           1
2  Green    L   14.0           2
3   Blue    M   13.0           1
4    Red    S   10.0           0


In [18]:
df_encoded = pd.get_dummies(df, columns=['Color'], drop_first=True)
print(df_encoded)


  Size  Price  Color_Green  Color_Red
0    S   10.5        False       True
1    M   12.0        False      False
2    L   14.0         True      False
3    M   13.0        False      False
4    S   10.0        False       True


In [22]:
from sklearn.preprocessing import OrdinalEncoder

ordinal_encoder = OrdinalEncoder(categories=[['S', 'M', 'L']])
df['Size_Ordinal'] = ordinal_encoder.fit_transform(df[['Size']])

print(df)


   Color Size  Price  Size_Ordinal
0    Red    S   10.5           0.0
1   Blue    M   12.0           1.0
2  Green    L   14.0           2.0
3   Blue    M   13.0           1.0
4    Red    S   10.0           0.0


In [2]:
import pandas as pd
data_csv=pd.read_csv(r"E:\Machine Learning\Machine-Learning\DataFiles\movie_data\genres.csv")

In [4]:
print(data_csv)

           id     genres
0         862  Animation
1         862     Comedy
2         862     Family
3        8844  Adventure
4        8844    Fantasy
...       ...        ...
93522   67758     Action
93523   67758      Drama
93524   67758   Thriller
93525  227506        NaN
93526  461257        NaN

[93527 rows x 2 columns]


In [5]:
print(data_csv.head())

     id     genres
0   862  Animation
1   862     Comedy
2   862     Family
3  8844  Adventure
4  8844    Fantasy


In [11]:
from sklearn.preprocessing import LabelEncoder
le={'Animation':0,'Comedy':1,'Drama':2,'Action':3,'Horror':4}
data_csv['Genre_Label']=data_csv['genres'].map(le)
print(data_csv.head())

     id     genres  Genre_Label
0   862  Animation          0.0
1   862     Comedy          1.0
2   862     Family          NaN
3  8844  Adventure          NaN
4  8844    Fantasy          NaN


In [20]:
data_csv_encoded = pd.get_dummies(data_csv, columns=['genres'], drop_first=True)
print(data_csv_encoded.head())

     id  Genre_Label  genres_Adventure  genres_Animation  genres_Comedy  \
0   862          0.0             False              True          False   
1   862          1.0             False             False           True   
2   862          NaN             False             False          False   
3  8844          NaN              True             False          False   
4  8844          NaN             False             False          False   

   genres_Crime  genres_Documentary  genres_Drama  genres_Family  \
0         False               False         False          False   
1         False               False         False          False   
2         False               False         False           True   
3         False               False         False          False   
4         False               False         False          False   

   genres_Fantasy  ...  genres_History  genres_Horror  genres_Music  \
0           False  ...           False          False         False  

In [7]:
import category_encoders as ce

df = pd.DataFrame({'City': ['Paris', 'London', 'Berlin', 'Tokyo', 'Paris']})
print(df)
binary_encoder = ce.BinaryEncoder(cols=['City'])
df_binary = binary_encoder.fit_transform(df)
print(df_binary)


     City
0   Paris
1  London
2  Berlin
3   Tokyo
4   Paris
   City_0  City_1  City_2
0       0       0       1
1       0       1       0
2       0       1       1
3       1       0       0
4       0       0       1


In [10]:
import pandas as pd

df = pd.DataFrame({
    'Color': ['Red', 'Blue', 'Green', 'Blue', 'Red'],
    'Size': ['S', 'M', 'L', 'M', 'S'],
    'Price': [10.5, 12.0, 14.0, 13.0, 10.0],
    'Bought': [1, 0, 1, 0, 1]  # target column for supervised encodings
})

print(df)


   Color Size  Price  Bought
0    Red    S   10.5       1
1   Blue    M   12.0       0
2  Green    L   14.0       1
3   Blue    M   13.0       0
4    Red    S   10.0       1


In [11]:
mean_target = df.groupby('Color')['Bought'].mean().to_dict()
df['Color_Target'] = df['Color'].map(mean_target)
print(df)


   Color Size  Price  Bought  Color_Target
0    Red    S   10.5       1           1.0
1   Blue    M   12.0       0           0.0
2  Green    L   14.0       1           1.0
3   Blue    M   13.0       0           0.0
4    Red    S   10.0       1           1.0


In [12]:
import category_encoders as ce
he = ce.HashingEncoder(cols=['Color'], n_components=4)
df_hash = he.fit_transform(df)
print(df_hash)


   col_0  col_1  col_2  col_3 Size  Price  Bought  Color_Target
0      0      0      0      1    S   10.5       1           1.0
1      0      1      0      0    M   12.0       0           0.0
2      0      1      0      0    L   14.0       1           1.0
3      0      1      0      0    M   13.0       0           0.0
4      0      0      0      1    S   10.0       1           1.0
