### Importing important libraries

In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier

### Loading the dataset

In [2]:
data = pd.read_csv('mushrooms.csv')
data.shape

(8124, 23)

In [3]:
data.head()

Unnamed: 0,class,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,...,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
0,p,x,s,n,t,p,f,c,n,k,...,s,w,w,p,w,o,p,k,s,u
1,e,x,s,y,t,a,f,c,b,k,...,s,w,w,p,w,o,p,n,n,g
2,e,b,s,w,t,l,f,c,b,n,...,s,w,w,p,w,o,p,n,n,m
3,p,x,y,w,t,p,f,c,n,n,...,s,w,w,p,w,o,p,k,s,u
4,e,x,s,g,f,n,f,w,b,k,...,s,w,w,p,w,o,e,n,a,g


In [4]:
label_encode = LabelEncoder()

In [5]:
data = data.apply(label_encode.fit_transform)

In [6]:
data.head()

Unnamed: 0,class,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,...,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
0,1,5,2,4,1,6,1,0,1,4,...,2,7,7,0,2,1,4,2,3,5
1,0,5,2,9,1,0,1,0,0,4,...,2,7,7,0,2,1,4,3,2,1
2,0,0,2,8,1,3,1,0,0,5,...,2,7,7,0,2,1,4,3,2,3
3,1,5,3,8,1,6,1,0,1,5,...,2,7,7,0,2,1,4,2,3,5
4,0,5,2,3,0,5,1,1,0,4,...,2,7,7,0,2,1,0,3,0,1


### Splitting the dataset into features and target set

In [7]:
X = data.iloc[:,1:]
y = data.iloc[:,0]
print("Shape of features set is %s and shape of target set is %s"%(X.shape,y.shape))

Shape of features set is (8124, 22) and shape of target set is (8124,)


In [8]:
print(type(X),type(y))

<class 'pandas.core.frame.DataFrame'> <class 'pandas.core.series.Series'>


In [9]:
# Converting the pandas object into numpy arrays
X = X.values
y = y.values
print(type(X),type(y))
print("Shape of features set is %s and shape of target set is %s"%(X.shape,y.shape))

<class 'numpy.ndarray'> <class 'numpy.ndarray'>
Shape of features set is (8124, 22) and shape of target set is (8124,)


In [10]:
np.unique(y)

array([0, 1])

### Splitting the data for Training and validation

In [11]:
X_train, X_val, Y_train, Y_val = train_test_split(X,y, test_size=0.2,random_state=42)

### Building the model using Gradient Boosting Algorithm

In [12]:
gbc = GradientBoostingClassifier()

In [13]:
model = gbc.fit(X_train,Y_train)

In [14]:
print("The accuracy score of model and train set is: ",model.score(X_train,Y_train))
print("The accuracy score of model and validation set is: ",model.score(X_val,Y_val))

The accuracy score of model and train set is:  1.0
The accuracy score of model and validation set is:  1.0


In [15]:
# Predicted labels on validation set
predicted_X_val = model.predict(X_val)

In [16]:
print(predicted_X_val)

[0 1 1 ... 1 1 1]
