# Desafio Extra - OneHotEncoding

___

# Imports 

In [1]:
import numpy as np
import pandas as pd

# Objetivo:

Implementar o **OneHotEncoding**, método muito utilizado em Machine Learning para codificar dados categóricos na forma em que algoritmos de aprendizado de máquina compreendam.

Exemplo:

```
original = pd.Series([
    "classe_1",
    "classe_1",
    "classe_2",
    "classe_2",
    "classe_1",
    "classe_2",
])

# --- 

encoded = pd.DataFrame(
    columns=["classe_1", "classe_2"],
    data=[
        [1, 0],
        [1, 0],
        [0, 1],
        [0, 1],
        [1, 0],
        [0, 1]
    ]
    
)

```


# Série Original:


In [2]:
classes = ["Leite", "Ovos", "Carne", "Arroz", "Feijão"]
labels = pd.Series(np.random.choice(classes, 100))

# Dataset Codificado:

##### Solução 1:

In [3]:
df = pd.DataFrame(
    index=labels.index,
    columns=labels.unique(),
    data=0.
)
for k in classes:
    df.loc[labels == k, k] = 1.
df

Unnamed: 0,Feijão,Leite,Arroz,Ovos,Carne
0,1.0,0.0,0.0,0.0,0.0
1,0.0,1.0,0.0,0.0,0.0
2,0.0,0.0,1.0,0.0,0.0
3,0.0,0.0,1.0,0.0,0.0
4,1.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,1.0,0.0
6,1.0,0.0,0.0,0.0,0.0
7,0.0,0.0,0.0,1.0,0.0
8,0.0,1.0,0.0,0.0,0.0
9,1.0,0.0,0.0,0.0,0.0


##### Solução 2:

In [4]:
df = pd.DataFrame(index=labels.index)

for k in classes:
    df[k] = (labels == k).astype(float)
df

Unnamed: 0,Leite,Ovos,Carne,Arroz,Feijão
0,0.0,0.0,0.0,0.0,1.0
1,1.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,1.0,0.0
3,0.0,0.0,0.0,1.0,0.0
4,0.0,0.0,0.0,0.0,1.0
5,0.0,1.0,0.0,0.0,0.0
6,0.0,0.0,0.0,0.0,1.0
7,0.0,1.0,0.0,0.0,0.0
8,1.0,0.0,0.0,0.0,0.0
9,0.0,0.0,0.0,0.0,1.0
