# Glass Identification Database - A Multi class classification

Attribute Information:<br/>
   1. Id number: 1 to 214
   2. RI: refractive index
   3. Na: Sodium (unit measurement: weight percent in corresponding oxide, as are attributes 4-10)
   4. Mg: Magnesium
   5. Al: Aluminum
   6. Si: Silicon
   7. K: Potassium
   8. Ca: Calcium
   9. Ba: Barium
  10. Fe: Iron
  11. Type of glass: (class attribute)<br/>
      -- 1 building_windows_float_processed<br/>
      -- 2 building_windows_non_float_processed<br/>
      -- 3 vehicle_windows_float_processed<br/>
      -- 4 vehicle_windows_non_float_processed (none in this database)<br/>
      -- 5 containers<br/>
      -- 6 tableware<br/>
      -- 7 headlamps<br/>

In [1]:
#importing packages
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier

In [2]:
import pandas as pd

In [3]:
#reading data into pandas dataframe

header_names = ['id', 'RI', 'Na', 'Mg', 'Al', 'Si', 'K', 'Ca', 'Ba', 'Fe', 'Type']
glass_data = pd.read_csv('glassdata.csv', header=None, names=header_names)

In [4]:
glass_data.head(5)

Unnamed: 0,id,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe,Type
0,1,1.52101,13.64,4.49,1.1,71.78,0.06,8.75,0.0,0.0,1
1,2,1.51761,13.89,3.6,1.36,72.73,0.48,7.83,0.0,0.0,1
2,3,1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.0,0.0,1
3,4,1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.0,0.0,1
4,5,1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.0,0.0,1


In [5]:
x, y  = glass_data[['RI', 'Na', 'Mg', 'Al', 'Si', 'K', 'Ca', 'Ba', 'Fe']], glass_data['Type']

In [6]:
train_x, test_x, train_y, test_y = train_test_split(x, y, random_state = 5, test_size=0.2)

##  Using Gaussian Naive Bayes Algorithm 

In [7]:
gaussNBmodel = GaussianNB()

In [8]:
gaussNBmodel.fit(train_x, train_y)

GaussianNB(priors=None)

In [9]:
gausspred = gaussNBmodel.predict(test_x)

In [10]:
accuracy = gaussNBmodel.score(test_x, test_y)

print("The Accuracy for Gaussian model is {}".format(accuracy*100))

The Accuracy for Gaussian model is 27.906976744186046


In [11]:
conf_mat = confusion_matrix(test_y, gausspred)

print(conf_mat)

[[ 0  2 14  0  0  0]
 [ 1  4  9  0  0  1]
 [ 0  1  1  0  0  0]
 [ 0  0  0  1  0  0]
 [ 0  0  0  0  1  1]
 [ 0  1  0  1  0  5]]


## Using Decision Tree Classifier

In [12]:
dtreemodel = DecisionTreeClassifier(max_depth=2)

In [13]:
dtreemodel.fit(train_x, train_y)

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=2,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best')

In [14]:
dtreepred = dtreemodel.predict(test_x)

In [15]:
conf_mat = confusion_matrix(test_y, dtreepred)

In [16]:
print("-"*40)
print("Decision Tree Confusion Matrix : \n")
print(conf_mat)
print("-"*40)

----------------------------------------
Decision Tree Confusion Matrix : 

[[14  1  0  0  0  1]
 [ 5 10  0  0  0  0]
 [ 2  0  0  0  0  0]
 [ 0  1  0  0  0  0]
 [ 2  0  0  0  0  0]
 [ 0  1  0  0  0  6]]
----------------------------------------


In [17]:
accuracy = accuracy_score(test_y, dtreepred)

print("The Accuracy for Decision Tree Model is {}".format(accuracy*100))

The Accuracy for Decision Tree Model is 69.76744186046511


## Using Random Forest

In [18]:
rforestclass = RandomForestClassifier(max_depth=7)

In [19]:
rforestclass.fit(train_x, train_y)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=7, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [20]:
rforestpred = rforestclass.predict(test_x)

In [21]:
conf_mat = confusion_matrix(test_y, rforestpred)

print("-"*40)
print("Random Forest Confusion Matrix : \n")
print(conf_mat)
print("-"*40)

----------------------------------------
Random Forest Confusion Matrix : 

[[15  1  0  0  0  0]
 [ 2 12  0  1  0  0]
 [ 1  1  0  0  0  0]
 [ 0  0  0  1  0  0]
 [ 0  0  0  0  2  0]
 [ 0  1  0  1  0  5]]
----------------------------------------


In [22]:
accuracy = accuracy_score(test_y, rforestpred)

print("The Accuracy for Random Forest Model is {}".format(accuracy*100))

The Accuracy for Random Forest Model is 81.3953488372093


## Using KNN

In [23]:
kneighmodel = KNeighborsClassifier(n_neighbors=10)

In [24]:
kneighmodel.fit(train_x, train_y)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=10, p=2,
           weights='uniform')

In [25]:
knnprediction = kneighmodel.predict(test_x)

In [26]:
conf_mat = confusion_matrix(test_y, knnprediction)

print("-"*40)
print("K Neares NeighbourConfusion Matrix : \n")
print(conf_mat)
print("-"*40)

----------------------------------------
K Neares NeighbourConfusion Matrix : 

[[13  1  2  0  0  0]
 [ 3 11  0  1  0  0]
 [ 2  0  0  0  0  0]
 [ 0  0  0  0  0  1]
 [ 1  0  0  0  0  1]
 [ 0  2  0  0  0  5]]
----------------------------------------


In [27]:
accuracy = accuracy_score(test_y, knnprediction)

print("The Accuracy for KNN Model is {}".format(accuracy*100))

The Accuracy for KNN Model is 67.44186046511628
