# Glass Classification

## Import libraries and the data

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

In [None]:
df = pd.read_csv("glass.csv")
df.columns = ['ID','RI','Na','Mg','Al','Si','K','Ca','Ba','Fe','Type']
df.drop('ID',inplace=True,axis=1)
print(str(df.head()) + '\n')
print(str(df.describe())+ '\n')
print(df.info())

## Visualize the data 

In [None]:
plt.figure(figsize=(10,10))
heatmap = sns.heatmap(df.corr(), cmap="YlGnBu",vmin = -1, vmax=1,annot=True)
heatmap.set_title("Glass Correlation Heatmap", fontsize=10)

In [None]:
figure, axis = plt.subplots(5,2,figsize=(10,20))
figure.tight_layout(pad=10,w_pad=10,h_pad=10)
sns.histplot(x = df['RI'],ax=axis[0][0])
sns.histplot(x = df['Na'],ax=axis[0][1])
sns.histplot(x = df['Mg'],ax=axis[1][0])
sns.histplot(x = df['Al'],ax=axis[1][1])
sns.histplot(x = df['Si'],ax=axis[2][0])
sns.histplot(x = df['K'],ax=axis[2][1])
sns.histplot(x = df['Ca'],ax=axis[3][0])
sns.histplot(x = df['Ba'],ax=axis[3][1])
sns.histplot(x = df['Fe'],ax=axis[4][0])
sns.histplot(x = df['Type'],ax=axis[4][1])

In [None]:
figure, axis = plt.subplots(3,3,figsize=(15,15))
figure.tight_layout(pad=10,w_pad=10,h_pad=10)
sns.scatterplot(x = 'RI',y = 'Type', data=df, ax=axis[0][0])
sns.scatterplot(x = 'Na',y = 'Type', data=df, ax=axis[0][1])
sns.scatterplot(x = 'Mg',y = 'Type', data=df, ax=axis[0][2])
sns.scatterplot(x = 'Al',y = 'Type', data=df, ax=axis[1][0])
sns.scatterplot(x = 'Si',y = 'Type', data=df, ax=axis[1][1])
sns.scatterplot(x = 'K',y = 'Type', data=df, ax=axis[1][2])
sns.scatterplot(x = 'Ca',y = 'Type', data=df, ax=axis[2][0])
sns.scatterplot(x = 'Ba',y = 'Type', data=df, ax=axis[2][1])
sns.scatterplot(x = 'Fe',y = 'Type', data=df, ax=axis[2][2])

## Prepare the data

In [None]:
y = df["Type"]
X = df.drop("Type",axis=1)
print(y.head())
print(X.head())

In [None]:
from sklearn.preprocessing import MinMaxScaler
X = MinMaxScaler().fit_transform(X)
print(X)

## Initial Testing 

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,random_state=42)
for model in [KNeighborsClassifier(),RandomForestClassifier(),SVC()]:
    model.fit(X_train,y_train)
    pred = model.predict(X_test)
    acc = accuracy_score(y_test,pred)
    print(str(model))
    print("Accuracy: {}".format(acc))
    print("Confustion Matrix:  \n {} \n".format(confusion_matrix(y_test, pred)))

## Hyperparameter Tuning 

In [None]:
from sklearn.model_selection import GridSearchCV
hyperParams = {
    'svm':{
        'C':[1,10,100],
        'kernel': ['rbf','linear','poly'],
        'degree':[2,3,4],
        'gamma':['scale','auto']
    },
    'randomForest':{ 
        'n_estimators':[10,100,200,400,800],
        'min_samples_split':[2,4,8],
        'min_samples_leaf':[1,2,4],
        'max_features':['auto','sqrt','log2']
        
    },
    'knn':{
        'n_neighbors':[3,5,7,10],
        'weights':['uniform','distance'],
        'algorithm':['ball_tree','kd_tree','brute','auto'],
        'leaf_size':[20,30,40]
    }
}

In [None]:
gridSVM = GridSearchCV(SVC(),hyperParams['svm'],cv=5,verbose=-1,n_jobs=-1)
gridSVM.fit(X,y)
gridSVMResults = pd.DataFrame(gridSVM.cv_results_)
print(gridSVMResults.head())
print(gridSVM.best_params_)
bestSVM = gridSVM.best_estimator_

In [None]:
gridRF = GridSearchCV(RandomForestClassifier(),hyperParams['randomForest'],cv=5,verbose=-1,n_jobs=-1)
gridRF.fit(X,y)
gridRFResults = pd.DataFrame(gridRF.cv_results_)
print(gridRFResults.head())
print(gridRF.best_params_)
bestRF = gridRF.best_estimator_

In [None]:
gridKNN = GridSearchCV(KNeighborsClassifier(),hyperParams['knn'],cv=5,verbose=-1,n_jobs=-1)
gridKNN.fit(X,y)
gridKNNResults = pd.DataFrame(gridKNN.cv_results_)
print(gridKNNResults.head())
print(gridKNN.best_params_)
bestKNN = gridKNN.best_estimator_

## Testing out the best models

In [None]:
for model in [bestKNN,bestRF,bestSVM]:
    model.fit(X_train,y_train)
    pred = model.predict(X_test)
    acc = accuracy_score(y_test,pred)
    print(str(model))
    print("Accuracy: {}".format(acc))
    print("Confustion Matrix:  \n {} \n".format(confusion_matrix(y_test, pred)))