# Mushroom Dataset

In [329]:
import tensorflow as tf
from tensorflow import keras

# Helper libraries
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

print(tf.__version__)

2.2.0


Importiamo il dataset:

In [330]:
url = 'https://raw.githubusercontent.com/rirolli/Mushroom/master/mushroom_data_all.csv'
mushroom_data = pd.read_csv(url)
print(mushroom_data.columns)

Index(['class_edible', 'cap-shape', 'cap-surface', 'cap-color', 'bruises',
       'odor', 'gill-attachment', 'gill-spacing', 'gill-size', 'gill-color',
       'stalk-shape', 'stalk-root', 'stalk-surface-above-ring',
       'stalk-surface-below-ring', 'stalk-color-above-ring',
       'stalk-color-below-ring', 'veil-type', 'veil-color', 'ring-number',
       'ring-type', 'spore-print-color', 'population', 'habitat'],
      dtype='object')


Preparazione dei dati:

In [0]:
# Entries with a '?' indicate a missing piece of data, and
# these entries are dropped from our dataset.
mushroom_data.replace('?', np.nan, inplace=True)
mushroom_data.dropna(inplace=True)

# The class of poisonous or edible is indicated in the data as
# either 'p' or 'e' respectively. We require that this is numeric,
# and therefore use '0' to indicate poisonous (or not edible) and
# '1' to indicate edible.
mushroom_data['class_edible'].replace('p', 0, inplace=True)
mushroom_data['class_edible'].replace('e', 1, inplace=True)

# Since we are dealing with non-numeric feature data, or in other
# words, categorical data, we need to replace these with numerical
# equivalents. Pandas has a nice function called "get_dummies" that
# performs this task.
cols_to_transform = mushroom_data.columns[1:]
mushroom_data = pd.get_dummies(mushroom_data, columns=cols_to_transform)

Split del dataset: 

In [0]:
y = mushroom_data.class_edible
X = mushroom_data.drop(labels=['class_edible'],axis=1)
num_inputs = X.shape[1]

train_X, val_X, train_y, val_y = train_test_split(X, y, random_state=0)

Adesso criamo il modello:

In [333]:
model = keras.Sequential([
  tf.keras.layers.Dense(12, input_dim=num_inputs, activation='relu'),
  tf.keras.layers.Dense(22, activation='relu'),
  tf.keras.layers.Dense(1)
])

model.summary()

model.compile(optimizer='adam',
              loss=tf.keras.losses.BinaryCrossentropy(),
              metrics=['accuracy'])

Model: "sequential_36"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_90 (Dense)             (None, 12)                1188      
_________________________________________________________________
dense_91 (Dense)             (None, 22)                286       
_________________________________________________________________
dense_92 (Dense)             (None, 1)                 23        
Total params: 1,497
Trainable params: 1,497
Non-trainable params: 0
_________________________________________________________________


Addestriamo la rete:

In [334]:
model.fit(train_X, train_y, epochs=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7f68e7ef7198>

get predicted prices on validation data


In [335]:
# get predicted prices on validation data
test_loss, test_acc = model.evaluate(val_X, val_y)
print(f'\nTest accuracy: {test_acc}')

predictions = model.predict(val_X)


Test accuracy: 1.0


In [339]:
print(predictions)
print('-'*10,'\n',val_y)

[[-0.89452314]
 [ 1.7636974 ]
 [ 1.9748014 ]
 ...
 [ 2.2252913 ]
 [-1.0250244 ]
 [-0.8784342 ]]
---------- 
 5261    0
2343    1
2552    1
895     1
4799    1
       ..
5725    0
384     1
1721    1
4750    0
3976    0
Name: class_edible, Length: 1411, dtype: int64


In [340]:
print(confusion_matrix(val_y, predictions))

ValueError: ignored