## One-Hot Encoding

In [8]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder

# Contoh data
data = pd.DataFrame({'Warna Kesukaan': ['Biru', 'Merah', 'Hijau', 'Biru', 'Hijau', 'Hijau', 'Biru']})

# Inisialisasi One-Hot Encoder
encoder = OneHotEncoder(categories=[['Biru', 'Merah', 'Hijau']])

# Fit data dan transform menjadi One-Hot Encoding
one_hot_encoded = encoder.fit_transform(data[['Warna Kesukaan']])

# Dapatkan nama kolom untuk data yang di-encode
feature_names = encoder.get_feature_names_out(['Warna Kesukaan'])

# Konversi hasil transform menjadi DataFrame dengan nama kolom yang sesuai
result = pd.DataFrame(one_hot_encoded.toarray(), columns=feature_names)

# Gabungkan hasil transform dengan data awal
result = pd.concat([data, result], axis=1)

print(result)


  Warna Kesukaan  Warna Kesukaan_Biru  Warna Kesukaan_Merah  \
0           Biru                  1.0                   0.0   
1          Merah                  0.0                   1.0   
2          Hijau                  0.0                   0.0   
3           Biru                  1.0                   0.0   
4          Hijau                  0.0                   0.0   
5          Hijau                  0.0                   0.0   
6           Biru                  1.0                   0.0   

   Warna Kesukaan_Hijau  
0                   0.0  
1                   0.0  
2                   1.0  
3                   0.0  
4                   1.0  
5                   1.0  
6                   0.0  


## Label Encoding

In [9]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

# Contoh data
data = pd.DataFrame({'Warna Kesukaan': ['Biru', 'Merah', 'Hijau', 'Biru', 'Hijau', 'Hijau', 'Biru']})

# Inisialisasi Label Encoder
encoder = LabelEncoder()

# Fit data dan transform menjadi Label Encoding
label_encoded = encoder.fit_transform(data['Warna Kesukaan'])

# Konversi hasil transform menjadi DataFrame
result = pd.DataFrame(label_encoded, columns=['Warna Kesukaan Encoded'])

print(result)


   Warna Kesukaan Encoded
0                       0
1                       2
2                       1
3                       0
4                       1
5                       1
6                       0


## Target Encoding

In [16]:
import pandas as pd
from category_encoders import TargetEncoder

# Contoh data
data = pd.DataFrame({'Warna Kesukaan': ['Biru', 'Merah', 'Hijau', 'Biru', 'Hijau', 'Hijau', 'Biru'],
                     'Target': [1, 0, 1, 0, 1, 0, 0]})

# Inisialisasi Target Encoder
encoder = TargetEncoder()

# Fit data dan transform menjadi Target Encoding
target_encoded = encoder.fit_transform(data['Warna Kesukaan'], data['Target'])

# Gabungkan hasil transform dengan data asli
data_encoded = pd.concat([data.drop('Warna Kesukaan', axis=1), target_encoded], axis=1)

print(data_encoded)


   Target  Warna Kesukaan
0       1        0.413860
1       0        0.372811
2       1        0.465349
3       0        0.413860
4       1        0.465349
5       0        0.465349
6       0        0.413860


## Beyesian Mean Target Encoding

In [20]:
import pandas as pd
from category_encoders import CatBoostEncoder

# Contoh data
data = pd.DataFrame({'Warna Kesukaan': ['Biru', 'Merah', 'Hijau', 'Biru', 'Hijau', 'Hijau', 'Biru'],
                     'Target': [1, 0, 1, 0, 1, 0, 0]})

# Inisialisasi CatBoost Encoder
encoder = CatBoostEncoder()

# Fit data dan transform menjadi Target Encoding
target_encoded = encoder.fit_transform(data['Warna Kesukaan'], data['Target'])

# Gabungkan hasil transform dengan data asli
data_encoded = pd.concat([data.drop('Warna Kesukaan', axis=1), target_encoded], axis=1)

print(data_encoded)


   Target  Warna Kesukaan
0       1        0.428571
1       0        0.428571
2       1        0.428571
3       0        0.714286
4       1        0.714286
5       0        0.809524
6       0        0.476190


## K-Fold-Encoding

In [24]:
import pandas as pd
from sklearn.model_selection import KFold
from category_encoders import TargetEncoder

# Contoh data
data = pd.DataFrame({'Warna Kesukaan': ['Biru', 'Merah', 'Hijau', 'Biru', 'Hijau', 'Hijau', 'Biru'],
                     'Target': [1, 0, 1, 0, 1, 0, 0]})

# Inisialisasi KFold
kf = KFold(n_splits=2)

# Inisialisasi Target Encoder
encoder = TargetEncoder()

# Lakukan KFold encoding
for i, (train_index, test_index) in enumerate(kf.split(data)):
    X_train, X_test = data.loc[train_index], data.loc[test_index]
    y_train = X_train.pop('Target')
    X_test.drop('Target', axis=1, inplace=True) # hapus kolom "Target" pada data uji
    X_train_encoded = encoder.fit_transform(X_train, y_train)
    X_test_encoded = encoder.transform(X_test)
    
    # Lakukan training dan evaluasi model pada setiap fold di sini
    
    # Tampilkan data setelah encoding
    print(f'Fold {i+1}:')
    print(X_train_encoded)
    print(X_test_encoded)
    print('\n')


Fold 1:
   Warna Kesukaan
4        0.356975
5        0.356975
6        0.289964
   Warna Kesukaan
0        0.289964
1        0.333333
2        0.356975
3        0.289964


Fold 2:
   Warna Kesukaan
0        0.500000
1        0.434946
2        0.565054
3        0.500000
   Warna Kesukaan
4        0.565054
5        0.565054
6        0.500000




## Leave-One-Out Target Encoding

In [18]:
!pip install category_encoders

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [27]:
import pandas as pd
from category_encoders import LeaveOneOutEncoder

# Contoh data
data = pd.DataFrame({'Warna Kesukaan': ['Biru', 'Merah', 'Hijau', 'Biru', 'Hijau', 'Hijau', 'Biru'],
                     'Target': [1, 0, 1, 0, 1, 0, 0]})

# Inisialisasi Leave-One-Out Encoder
encoder = LeaveOneOutEncoder()

# Fit data dan transform menjadi Leave-One-Out Encoding
leave_one_out_encoded = encoder.fit_transform(data['Warna Kesukaan'], data['Target'])

# Gabungkan hasil transform dengan data awal
result = pd.concat([data.drop('Warna Kesukaan', axis=1), leave_one_out_encoded], axis=1)

print(result)


   Target  Warna Kesukaan
0       1        0.000000
1       0        0.428571
2       1        0.500000
3       0        0.500000
4       1        0.500000
5       0        1.000000
6       0        0.500000
