# Conversão de atributos categóricos para numéricos

In [15]:
import pandas as pd
import numpy as np

## Dataset 
https://archive.ics.uci.edu/ml/datasets/Car+Evaluation

### class values

unacc, acc, good, vgood

### Attributes

- buying:   vhigh, high, med, low.
- maint:    vhigh, high, med, low.
- doors:    2, 3, 4, 5more.
- persons:  2, 4, more.
- lug_boot: small, med, big.
- safety:   low, med, high.

In [12]:
df = pd.read_csv('datasets/car.csv', header=None)
df.columns = ['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety', 'class']

In [13]:
df.head()

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,class
0,vhigh,vhigh,2,2,small,low,unacc
1,vhigh,vhigh,2,2,small,med,unacc
2,vhigh,vhigh,2,2,small,high,unacc
3,vhigh,vhigh,2,2,med,low,unacc
4,vhigh,vhigh,2,2,med,med,unacc


In [18]:
df.dtypes

buying      object
maint       object
doors       object
persons     object
lug_boot    object
safety      object
class       object
dtype: object

In [19]:
df.describe()

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,class
count,1728,1728,1728,1728,1728,1728,1728
unique,4,4,4,3,3,3,4
top,vhigh,vhigh,3,more,big,low,unacc
freq,432,432,432,576,576,576,1210


In [36]:
np.unique(df['buying'])

array(['high', 'low', 'med', 'vhigh'], dtype=object)

In [43]:
for col in df:
    print('{}: {}'.format(col, np.unique(df[col])))

buying: ['high' 'low' 'med' 'vhigh']
maint: ['high' 'low' 'med' 'vhigh']
doors: ['2' '3' '4' '5more']
persons: ['2' '4' 'more']
lug_boot: ['big' 'med' 'small']
safety: ['high' 'low' 'med']
class: ['acc' 'good' 'unacc' 'vgood']


In [40]:
def cat2numeric_map(df):
    map_replace = {}
    for column in df:
        if not np.issubdtype(df[column].dtype, np.number):
            elements = np.unique(df[column])
            map_replace[column] = { elements[i]: i for i in range(elements.size) }  
    return map_replace 

In [44]:
map_replace = cat2numeric_map(df)
map_replace

{'buying': {'high': 0, 'low': 1, 'med': 2, 'vhigh': 3},
 'class': {'acc': 0, 'good': 1, 'unacc': 2, 'vgood': 3},
 'doors': {'2': 0, '3': 1, '4': 2, '5more': 3},
 'lug_boot': {'big': 0, 'med': 1, 'small': 2},
 'maint': {'high': 0, 'low': 1, 'med': 2, 'vhigh': 3},
 'persons': {'2': 0, '4': 1, 'more': 2},
 'safety': {'high': 0, 'low': 1, 'med': 2}}

In [45]:
map_replace = {'buying': {'low': 0, 'med': 1, 'high': 2, 'vhigh': 3},
 'class': {'acc': 0, 'unacc': 1, 'good': 2, 'vgood': 3},
 'doors': {'2': 2, '3': 3, '4': 4, '5more': 5},
 'lug_boot': {'small': 0, 'med': 1, 'big': 2},
 'maint': {'low': 0, 'med': 1, 'high': 2, 'vhigh': 3},
 'persons': {'2': 2, '4': 4, 'more': 5},
 'safety': {'low': 0, 'med': 1, 'high': 2}}

In [46]:
df_conv = df.replace(map_replace)
df_conv.head()

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,class
0,3,3,2,2,0,0,1
1,3,3,2,2,0,1,1
2,3,3,2,2,0,2,1
3,3,3,2,2,1,0,1
4,3,3,2,2,1,1,1


In [47]:
df_conv.dtypes

buying      int64
maint       int64
doors       int64
persons     int64
lug_boot    int64
safety      int64
class       int64
dtype: object