## Glass Type classification 
Hey there,"welcome" to my first notebook. I hope you benefit this notebook.
In this notebook, I am going to use "Decision tree" and "K-nearest Neighbors" algorithm.

## Prepare problem

In [None]:
#importing libraries
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn import metrics
import matplotlib.pyplot as plt

In [None]:
# loading the pre-processed dataset
data = pd.read_csv('../input/glass/glass.csv')

In [None]:
#shape of the data
data.shape

In [None]:
#first five rows of the data
data.head()

In [None]:
#checking missing values in the data
data.isnull().sum()

In [None]:
# checking the data type
data.dtypes

## descriptive statastics

In [None]:
data.describe()

In [None]:
data['Type'].value_counts()

In [None]:
# data visualization
glass_classes = data['Type'].unique()
values = data['Type'].value_counts()

plt.bar(glass_classes, values)
plt.title('Types_of_Glass')
plt.xlabel('Glass Classes')
plt.ylabel('Examples count')
plt.show()

In [None]:
# separating the independent and dependent variables

# storing all the independent variables as X
X = data.drop(['Type'], axis=1)

# storing the dependent variable as y
y = data['Type']

In [None]:
# shape of independent and dependent variables
X.shape, y.shape

In [None]:
#importing sklearn and train_test_split to create validation set
import sklearn
from sklearn.model_selection import train_test_split
#creating the train and validation set
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.2, stratify=y, random_state = 70)

In [None]:
# distribution in training set
y_train.value_counts(normalize=True)

In [None]:
# distribution in testing set
y_test.value_counts(normalize=True)

In [None]:
#shape of training set
X_train.shape, y_train.shape

In [None]:
#shape of testing set
X_test.shape,y_test.shape

## Decision Tree 

In [None]:
#importing decision tree classifier 
from sklearn.tree import DecisionTreeClassifier

In [None]:
#fitting the model
dt_model = DecisionTreeClassifier(random_state=10)

In [None]:
dt_model.fit(X_train, y_train)

In [None]:
#checking the training score
dt_model.score(X_train, y_train)

In [None]:
#checking the validation score
dt_model.score(X_test, y_test)

In [None]:
#predictions on validation set
dt_predict=dt_model.predict(X_test)

In [None]:
dt_predict

In [None]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test,dt_predict)

# Finding optimum max_depth

In [None]:
train_accuracy = []
validation_accuracy = []
for depth in range(1,15):
    dt_model = DecisionTreeClassifier(max_depth=depth, random_state=6)
    dt_model.fit(X_train, y_train)
    train_accuracy.append(dt_model.score(X_train, y_train))
    validation_accuracy.append(dt_model.score(X_test, y_test))

In [None]:
frame = pd.DataFrame({'max_depth':range(1,15), 'train_acc':train_accuracy, 'test_acc':validation_accuracy})
frame.head()

In [None]:
plt.figure(figsize=(14,6))
plt.plot(frame['max_depth'], frame['train_acc'], marker='o')
plt.plot(frame['max_depth'], frame['test_acc'], marker='o')
plt.xlabel('Depth of tree')
plt.ylabel('performance')
plt.legend(['train_acc','test_acc'])

In [None]:
# max. depth goes on increasing till max. depth is 9 after that remain const. 
dt_model = DecisionTreeClassifier(max_depth=9, max_leaf_nodes=35, random_state=10)

In [None]:
#fitting the model
dt_model.fit(X_train, y_train)

In [None]:
#Training score
dt_model.score(X_train, y_train)

In [None]:
#Validation score
dt_model.score(X_test, y_test)

In [None]:
dt_predict1=dt_model.predict(X_test)

In [None]:
dt_predict1

In [None]:
accuracy_score(y_test,dt_predict1)

In [None]:
# Get the confusion Matrix of the Model
cnf_matrix = metrics.confusion_matrix(y_test,dt_predict1)
cnf_matrix

In [None]:
# Plot the Confusion Matrix as a HeatMap
import seaborn as sns
class_names=[1,2,3,5,6,7] # Name  of classes
fig, ax = plt.subplots()
tick_marks = np.arange(len(class_names))
plt.xticks(tick_marks, class_names)
plt.yticks(tick_marks, class_names)
# create heatmap
sns.heatmap(pd.DataFrame(cnf_matrix), annot=True, cmap="YlGnBu" ,fmt='g')
ax.xaxis.set_label_position("top")
plt.tight_layout()
plt.title('Confusion matrix', y=1.1)
plt.ylabel('Actual label')
plt.xlabel('Predicted label')

In [None]:
print(metrics.classification_report(y, dt_model.predict(X),zero_division=1)) # Displays a comprehensive Report of the DT Model

In [None]:
from sklearn import tree

In [None]:
plt.figure(figsize = (20,10))
tree.plot_tree(dt_model);

# K-Nearest Neighbour

In [None]:
#importing KNN classifier and metric F1score
from sklearn.neighbors import KNeighborsClassifier as KNN

In [None]:
# important to mention that n_neighbors = 5, is not a multiple value of our quantity of features
clf = KNN(n_neighbors = 5)

# Fitting the model
clf.fit(X_train, y_train)

In [None]:
predict_type_n = clf.predict(X_test)

In [None]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test,predict_type_n)

# finding optimum k value

In [None]:
train_accuracy = []
validation_accuracy = []
for i in range(1,15):
    clf = KNN(n_neighbors = i)
    clf.fit(X_train, y_train)
    train_accuracy.append(clf.score(X_train, y_train))
    validation_accuracy.append(clf.score(X_test, y_test))

In [None]:
frame = pd.DataFrame({'n_neighbors':range(1,15), 'train_acc':train_accuracy, 'valid_acc':validation_accuracy})
frame.head(10)

In [None]:
plt.figure(figsize=(14,6))
plt.plot(frame['n_neighbors'], frame['train_acc'], marker='o')
plt.plot(frame['n_neighbors'], frame['valid_acc'], marker='o')
plt.xlabel('Value of K')
plt.ylabel('performance')
plt.legend(['train_acc','valid_acc'])

In [None]:
clf = KNN(n_neighbors = 8)
clf.fit(X_train, y_train)
#Training score
clf.score(X_train, y_train)

In [None]:
predict_type_n_2 = clf.predict(X_test)
predict_type_n_2

In [None]:
accuracy_score(y_test,predict_type_n_2)

In [None]:
# Get the confusion Matrix of the Model
cnf_matrix_1 = metrics.confusion_matrix(y_test,predict_type_n_2)
cnf_matrix_1

In [None]:
# Plot the Confusion Matrix as a HeatMap
class_names=[1,2,3,5,6,7] # Name  of classes
fig, ax = plt.subplots()
tick_marks = np.arange(len(class_names))
plt.xticks(tick_marks, class_names)
plt.yticks(tick_marks, class_names)
# create heatmap
sns.heatmap(pd.DataFrame(cnf_matrix_1), annot=True, cmap="YlGnBu" ,fmt='g')
ax.xaxis.set_label_position("top")
plt.tight_layout()
plt.title('Confusion matrix', y=1.1)
plt.ylabel('Actual label')
plt.xlabel('Predicted label')

In [None]:
print(metrics.classification_report(y, clf.predict(X),zero_division=1)) # Displays a comprehensive Report of the KNN Model

# SVM

In [None]:
from sklearn.svm import SVC


In [None]:
clf_1=SVC(kernel='linear')

In [None]:
clf_1.fit(X_train, y_train)

In [None]:
clf_1.get_params()

In [None]:
y_pred = clf_1.predict(X_test)

In [None]:
# Model Accuracy: how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

In [None]:
# Get the confusion Matrix of the Model
cnf_matrix = metrics.confusion_matrix(y_test, y_pred)
cnf_matrix

In [None]:
# Plot the Confusion Matrix as a HeatMap
import seaborn as sns
class_names=[1,2,3,5,6,7] # Name  of classes
fig, ax = plt.subplots()
tick_marks = np.arange(len(class_names))
plt.xticks(tick_marks, class_names)
plt.yticks(tick_marks, class_names)
# create heatmap
sns.heatmap(pd.DataFrame(cnf_matrix), annot=True, cmap="YlGnBu" ,fmt='g')
ax.xaxis.set_label_position("top")
plt.tight_layout()
plt.title('Confusion matrix', y=1.1)
plt.ylabel('Actual label')
plt.xlabel('Predicted label')

In [None]:
print(metrics.classification_report(y, clf_1.predict(X),zero_division=1)) # Displays a comprehensive Report of the SVC Model

## THANK YOU.....