In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('agaricus-lepiota.data')

In [3]:
df.head()

Unnamed: 0,class,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,...,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
0,p,x,s,n,t,p,f,c,n,k,...,s,w,w,p,w,o,p,k,s,u
1,e,x,s,y,t,a,f,c,b,k,...,s,w,w,p,w,o,p,n,n,g
2,e,b,s,w,t,l,f,c,b,n,...,s,w,w,p,w,o,p,n,n,m
3,p,x,y,w,t,p,f,c,n,n,...,s,w,w,p,w,o,p,k,s,u
4,e,x,s,g,f,n,f,w,b,k,...,s,w,w,p,w,o,e,n,a,g


In [4]:
# quantidade de valores categóricos únicos em cada coluna
df.apply('nunique')

class                        2
cap-shape                    6
cap-surface                  4
cap-color                   10
bruises                      2
odor                         9
gill-attachment              2
gill-spacing                 2
gill-size                    2
gill-color                  12
stalk-shape                  2
stalk-root                   5
stalk-surface-above-ring     4
stalk-surface-below-ring     4
stalk-color-above-ring       9
stalk-color-below-ring       9
veil-type                    1
veil-color                   4
ring-number                  3
ring-type                    5
spore-print-color            9
population                   6
habitat                      7
dtype: int64

In [5]:
#remoção de todos as amostras com features faltantes
df.replace('?', np.nan, inplace=True)
df.dropna(inplace=True)
df.shape

(5644, 23)

In [6]:
#remoção da coluna 'veil-type', onde todos os valores são 'p'
del df['veil-type']

In [7]:
df['class'].unique()

array(['p', 'e'], dtype=object)

In [8]:
X = df.drop('class', axis=1)
y = df['class']

In [9]:
X.head()

Unnamed: 0,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,stalk-shape,...,stalk-surface-above-ring,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-color,ring-number,ring-type,spore-print-color,population,habitat
0,x,s,n,t,p,f,c,n,k,e,...,s,s,w,w,w,o,p,k,s,u
1,x,s,y,t,a,f,c,b,k,e,...,s,s,w,w,w,o,p,n,n,g
2,b,s,w,t,l,f,c,b,n,e,...,s,s,w,w,w,o,p,n,n,m
3,x,y,w,t,p,f,c,n,n,e,...,s,s,w,w,w,o,p,k,s,u
4,x,s,g,f,n,f,w,b,k,t,...,s,s,w,w,w,o,e,n,a,g


In [10]:
y.head()

0    p
1    e
2    e
3    p
4    e
Name: class, dtype: object

In [11]:
X['cap-color'].unique().shape

(8,)

### Pré-processamento dos dados:

In [12]:
from sklearn.preprocessing import LabelEncoder

In [13]:
#criando um encoder para cada coluna:
le_y = LabelEncoder()

#lista de encoders das colunas de X:
encoders_x = []
for i in range(len(X.columns.values)):
    encoders_x.append(LabelEncoder())

In [14]:
#como todas as colunas são categóricas, podemos iterar sobre todo o dataframe
y = le_y.fit_transform(df['class'])
for i, nome_da_classe in enumerate(df.columns.values[1:]):
    X[nome_da_classe] = encoders_x[i].fit_transform(X[nome_da_classe])

In [15]:
le_y.inverse_transform(y)

array(['p', 'e', 'e', ..., 'e', 'p', 'p'], dtype=object)

In [19]:
#exemplo, encoders_x[0] armazena o encoder para a coluna 0 (cap-shape) de X:
encoders_x[0].inverse_transform(X['cap-shape'])

array(['x', 'x', 'b', ..., 'x', 'x', 'f'], dtype=object)

In [20]:
X.head()

Unnamed: 0,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,stalk-shape,...,stalk-surface-above-ring,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-color,ring-number,ring-type,spore-print-color,population,habitat
0,5,2,4,1,6,1,0,1,2,0,...,2,2,5,5,0,1,3,1,3,5
1,5,2,7,1,0,1,0,0,2,0,...,2,2,5,5,0,1,3,2,2,1
2,0,2,6,1,3,1,0,0,3,0,...,2,2,5,5,0,1,3,2,2,3
3,5,3,6,1,6,1,0,1,3,0,...,2,2,5,5,0,1,3,1,3,5
4,5,2,3,0,5,1,1,0,2,1,...,2,2,5,5,0,1,0,2,0,1


In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y_enc, test_size=0.33)

In [None]:
import tensorflow as tf
from tensorflow import keras

In [None]:
model = keras.Sequential([ 
    keras.layers.Dense(12, input_shape=(4,), activation='relu', name='oculta1'),
    keras.layers.Dense(8, activation='relu', name='oculta2'),
    keras.layers.Dense(10, activation='relu', name='oculta3'),
    keras.layers.Dense(3, activation='softmax', name='saida')
])
model.summary()

In [None]:
from tensorflow.keras.optimizers import Adam

In [None]:
model.compile(optimizer=tf.keras.optimizers.Adam(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
model.fit(X_train,y_train, epochs = 200)

In [None]:
model.evaluate(X_test,y_test)