In [None]:
import numpy as np
import pandas as pd

In [None]:
data = pd.read_csv('../input/mushrooms.csv')

In [None]:
data.head()

In [None]:
data.info()

In [None]:
pd.DataFrame(data=list(data.columns.map(lambda x: data[x].nunique(()))), index=data.columns, columns=['nunique'])

In [None]:
data.drop(['veil-type'], axis=1, inplace=True)

In [None]:
from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder()

for col in data.columns:
    data[col] = encoder.fit_transform(data[col])

data.head()

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
%matplotlib inline
sns.set_style('whitegrid')

In [None]:
plt.figure(figsize=(16,15))
sns.heatmap(data.corr(), annot=True)

In [None]:
sns.countplot(x='class', data=data, palette='Set1')

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(15, 7))

plt.subplot(1, 3, 1)
sns.distplot(data['cap-shape'])

plt.subplot(1, 3, 2)
sns.countplot(x='cap-shape', data=data, palette='rainbow')

plt.subplot(1, 3, 3)
sns.countplot(x='cap-shape', data=data, hue='class', palette='rainbow')

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(15, 7))

plt.subplot(1, 2, 1)
sns.countplot(x='cap-surface', data=data, palette='rainbow')

plt.subplot(1, 2, 2)
sns.countplot(x='class', data=data, hue='cap-surface', palette='rainbow')

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(15, 7))

plt.subplot(1, 2, 1)
sns.countplot(x='cap-color', data=data, palette='rainbow')

plt.subplot(1, 2, 2)
sns.countplot(x='class', data=data, hue='cap-color', palette='rainbow')

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(15, 7))

plt.subplot(1, 2, 1)
sns.countplot(x='bruises', data=data, palette='coolwarm')

plt.subplot(1, 2, 2)
sns.countplot(x='class', data=data, hue='bruises', palette='coolwarm')

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(15, 7))

plt.subplot(1, 2, 1)
sns.countplot(x='odor', data=data, palette='rainbow')

plt.subplot(1, 2, 2)
sns.countplot(x='class', data=data, hue='odor', palette='rainbow')

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(15, 7))

plt.subplot(1, 2, 1)
sns.countplot(x='gill-attachment', data=data, palette='coolwarm')

plt.subplot(1, 2, 2)
sns.countplot(x='class', data=data, hue='gill-attachment', palette='coolwarm')

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(15, 7))

plt.subplot(1, 2, 1)
sns.countplot(x='gill-spacing', data=data, palette='coolwarm')

plt.subplot(1, 2, 2)
sns.countplot(x='class', data=data, hue='gill-spacing', palette='coolwarm')

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(15, 7))

plt.subplot(1, 2, 1)
sns.countplot(x='gill-size', data=data, palette='coolwarm')

plt.subplot(1, 2, 2)
sns.countplot(x='class', data=data, hue='gill-size', palette='coolwarm')

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(15, 7))

plt.subplot(1, 2, 1)
sns.countplot(x='gill-color', data=data, palette='rainbow')

plt.subplot(1, 2, 2)
sns.countplot(x='class', data=data, hue='gill-color', palette='rainbow')

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(15, 7))

plt.subplot(1, 2, 1)
sns.countplot(x='stalk-shape', data=data, palette='coolwarm')

plt.subplot(1, 2, 2)
sns.countplot(x='class', data=data, hue='stalk-shape', palette='coolwarm')

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(15, 7))

plt.subplot(1, 2, 1)
sns.countplot(x='stalk-root', data=data, palette='rainbow')

plt.subplot(1, 2, 2)
sns.countplot(x='class', data=data, hue='stalk-root', palette='rainbow')

In [None]:
X = data[data.columns[1:]]
y = data['class']

In [None]:
X.head()

In [None]:
y.head()

In [None]:
from sklearn.decomposition import PCA

In [None]:
var = []
for n in range(1, 21):
    pca = PCA(n_components=n)
    pca.fit(X)
    var.append(np.sum(pca.explained_variance_ratio_))

In [None]:
plt.figure(figsize=(10,6))
plt.plot(range(1,21), var, color='red', linestyle='dashed', marker='o', markerfacecolor='black', markersize=10)
plt.title('Variance vs. Components')
plt.xlabel('Components')
plt.ylabel('Variance')

In [None]:
pca = PCA(n_components=15)
X = pca.fit_transform(X)

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=101)

In [None]:
from tpot import TPOTClassifier

In [None]:
pipeline = TPOTClassifier(generations=5, population_size=20, cv=5, n_jobs=-1, verbosity=2)

In [None]:
pipeline.fit(X_train, y_train)

In [None]:
pipeline.score(X_test, y_test)

In [None]:
from sklearn.metrics import confusion_matrix, classification_report

In [None]:
y_pred = pipeline.predict(X_test)

In [None]:
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))