# mushroom-classification

With this data, we will use artificial neural networks to classify that data.
And also we will use algorithms for design using machine learning..

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
#This is in order to ignore errors that may appear.
import warnings
warnings.filterwarnings("ignore")

In [None]:
data=pd.read_csv("../input/mushroom-classification/mushrooms.csv")

In [None]:
data.shape

In [None]:
data.head()

In [None]:
list(data.columns)

In [None]:
len(data.columns)

In [None]:
# We note that data has only one type, which is the object
data.info()

Here we try to convert those meanings and letters into meaningful words by knowing which letters belong to which columns.

In [None]:
for i in data.columns:
  print(i, data[i].unique())

Now we will convert those characters and values that have no meaning in the table into numeric values.

In [None]:
from sklearn.preprocessing import LabelEncoder
object_1=LabelEncoder()
# During the conversion process, we used the first projection.
for i in data.columns:
    data[i] = object_1.fit_transform(data[i])

In [None]:
data.head()

In [None]:
grouped_data = data.groupby('class')
print(grouped_data)

In [None]:
plt.hist(data["class"])
plt.show() 

In [None]:
#This drawing looks better than the figure.
sns.countplot(data["class"])
plt.title("Countplot for class")
plt.show()

In [None]:
# Now we will calculate the correlation coefficient for each feature
data.corr()

In [None]:
# Now we will draw a heat map.
fig, ax = plt.subplots(figsize=(20,20))
sns.heatmap(data.corr(), annot=True, linewidths=.5, ax=ax)
plt.show()

In [None]:
# Here we have deleted one of the columns that has no connection with the rest of the data.
data.drop(['veil-type'], axis=1, inplace=True)

In [None]:
# Now we will draw a heat map.
fig, ax = plt.subplots(figsize=(15,15))
sns.heatmap(data.corr(), annot=True, linewidths=.5, ax=ax)
plt.show()

In [None]:
# Here we get the top 5 characteristics of the target "class" that affect the algorithm.
corr_1=data.corr()
most_eff=corr_1.nlargest(5,"class")
most_eff

In [None]:
sns.lineplot(data=most_eff)

In [None]:
sns.lineplot(data=data, x="class", y="bruises")

In [None]:
# use to set style of background of plot
sns.set(style="whitegrid")
 
# plotting strip plot with seaborn
# deciding the attributes of dataset on
# which plot should be made
ax = sns.stripplot(x='class', y='gill-size', data=data)
 
# giving title to the plot
plt.title('Graph')
 
# function to show plot
plt.show()

Now here we will start the process of partitioning the data.
First, we will credit the results column over the rest of the columns.

In [None]:
target= data["class"].values
feature= data.drop(["class"],axis=1)

In [None]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(feature,target, test_size=0.2, random_state=11)

In [None]:
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)

In [None]:
# Now we are going to use a neural network for classification
# Here we will call the libraries that we need.
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from keras.models import Sequential # empty neural network
from keras.layers import Dense # layer constitution


In [None]:
def build_classifier():
    classifier = Sequential() # initialize neural network architecture
    classifier.add(Dense(units = 8, kernel_initializer="uniform", activation="relu", input_dim = x_train.shape[1]))
    classifier.add(Dense(units = 8, kernel_initializer="uniform", activation="relu")) #kernel_initializer: to initialize weights
    classifier.add(Dense(units = 1, kernel_initializer="uniform", activation="sigmoid")) #output layer
    classifier.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
    return classifier

classifier = KerasClassifier(build_fn = build_classifier, epochs=70, batch_size=10)
# epoch = number of iteration, batch size : efers to the number of training examples utilized in one iteration.
accurisies = cross_val_score(estimator=classifier, X=x_train, y = y_train, cv = 2)
mean = accurisies.mean()
variance = accurisies.std()
print("Accuracy mean : ", str(mean))
print("Accuracy variance : ", str(variance))

In [None]:
# Here we are working to increase the efficiency of the algorithm in validation data.
history = classifier.fit(x_test, y_test, validation_split=0.20, epochs=70, batch_size=10, verbose=1)

In [None]:
# Accurasy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy vs Epoch')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

In [None]:
# Loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss vs Epoch')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper right')
plt.show()

# Thank you for your time and thank you for voting.