[View in Colaboratory](https://colab.research.google.com/github/sabumjung/DL-with-TensorFlow/blob/DL-with-TensorFlow/1categorical.ipynb)

In [26]:
from __future__ import print_function

import numpy as np

from sklearn.preprocessing import LabelEncoder, LabelBinarizer, OneHotEncoder
from sklearn.feature_extraction import DictVectorizer, FeatureHasher


# 랜덤넘버 시드값 설정(재현성 목적)
np.random.seed(1000)

if __name__ == '__main__':
    print('Y[0]')
    Y = np.random.choice(('Male', 'Female'), size=(10))
    print(Y[0])

    # 레이블 인코드(Encode the labels)
    print('\nLabel encoding')
    le = LabelEncoder()
    yt = le.fit_transform(Y)
    print(yt)  
    print(le.classes_)
    

    # 더미출력 디코드(Decode a dummy output)
    print('\nLabel decoding')
    output = [1, 0, 1, 1, 0, 0]
    decoded_output = [le.classes_[i] for i in output]
    print(decoded_output)

    # 레이블 이진화(Binarize the labels)
    print('\nLabel binarization')
    lb = LabelBinarizer()
    yb = lb.fit_transform(Y)
    print(yb)

    # 이진화 레이블 디코드(Decode the binarized labels)
    print('\nLabel decoding')
    print(lb.inverse_transform(yb))

    # 딕셔너리 데이터 정의(Define some dictionary data)
    data = [
        {'feature_1': 10, 'feature_2': 15},
        {'feature_1': -5, 'feature_3': 22},
        {'feature_3': -2, 'feature_4': 10}
    ]

    # 딕셔너리 데이터 벡터화(Vectorize the dictionary data)
    print('\nDictionary data vectorization')
    dv = DictVectorizer()
    Y_dict = dv.fit_transform(data)
    print(Y_dict.todense())

    print('\nVocabulary:')
    print(dv.vocabulary_)

    # 피처해싱(Feature hashing)
    print('\nFeature hashing')
    fh = FeatureHasher()
    Y_hashed = fh.fit_transform(data)

    # 피처디코드(Decode the features)
    print('\nFeature decoding')
    print(Y_hashed.todense())

    # 원-핫 인코딩(One-hot encoding)
    data1 = [
        [0, 10],
        [1, 11],
        [1, 8],
        [0, 12],
        [0, 15]
    ]

    # 데이터 인코드(Encode data)
    oh = OneHotEncoder(categorical_features=[0])
    Y_oh = oh.fit_transform(data1)
    print(Y_oh.todense())

Y[0]
Female

Label encoding
[0 0 0 1 0 1 1 0 0 1]
['Female' 'Male']

Label decoding
['Male', 'Female', 'Male', 'Male', 'Female', 'Female']

Label binarization
[[0]
 [0]
 [0]
 [1]
 [0]
 [1]
 [1]
 [0]
 [0]
 [1]]

Label decoding
['Female' 'Female' 'Female' 'Male' 'Female' 'Male' 'Male' 'Female'
 'Female' 'Male']

Dictionary data vectorization
[[10. 15.  0.  0.]
 [-5.  0. 22.  0.]
 [ 0.  0. -2. 10.]]

Vocabulary:
{'feature_2': 1, 'feature_3': 2, 'feature_1': 0, 'feature_4': 3}

Feature hashing

Feature decoding
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
[[ 1.  0. 10.]
 [ 0.  1. 11.]
 [ 0.  1.  8.]
 [ 1.  0. 12.]
 [ 1.  0. 15.]]
