In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import matplotlib.pyplot as plt

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Dataset Loading

In [None]:
df=pd.read_csv("../input/glass/glass.csv")

In [None]:
df.head()

# 2.Data Preprocessing

In [None]:
#checking data type and null values
df.info()

In [None]:
#checking null values
df.isna().sum()

In [None]:
#checking Over of data distribution
df.describe()

# 3. Data Visulization

In [None]:
sns.countplot(x=df['Type'])

In [None]:
sns.pairplot(df)

In [None]:
plt.figure(figsize=(10,8))
sns.heatmap(df.corr(),annot=True,cmap="coolwarm")

From above graph <br>
Positive corelated column: Ca-Rl<br>
Negative corelated column: mg-Type

In [None]:
# Separating Features and Label
x=df.drop(columns=['Type'])
y=df['Type']

In [None]:
# splitting dataset in train data and test data
from sklearn.model_selection import train_test_split
xtrain,xtest,ytrain,ytest=train_test_split(x,y,test_size=0.2,random_state=4)

# 4.Classification Models

## 4.1 Logistic Regression

In [None]:
# as accuracy is very less we use scalling
from sklearn.preprocessing import StandardScaler
scale=StandardScaler()
scale_xtrain=scale.fit_transform(xtrain)
scale_xtest=scale.fit_transform(xtest)

In [None]:
# training and fitting model
from sklearn.linear_model import LogisticRegression
model=LogisticRegression()
model.fit(scale_xtrain,ytrain)
ypred=model.predict(scale_xtest)

In [None]:
# Evaluation of Model
from sklearn.metrics import confusion_matrix,classification_report,accuracy_score
lg_acc=accuracy_score(ytest,ypred)
print("Accuracy Score is:",lg_acc)
cm=confusion_matrix(ytest,ypred)
sns.heatmap(cm,annot=True)
print(classification_report(ytest,ypred))

## 4.2 KNN  (k-nearest neighbors)

In [None]:
# training and fitting model
from sklearn.neighbors import KNeighborsClassifier
model=KNeighborsClassifier()
model.fit(xtrain,ytrain)
ypred=model.predict(xtest)

In [None]:
# Evaluation of Model
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix
knn_acc=accuracy_score(ytest,ypred)
print("Accuracy Score is:",knn_acc)
cm=confusion_matrix(ytest,ypred)
sns.heatmap(cm,annot=True)
print(classification_report(ytest,ypred))
plt.ylabel('actual label')
plt.xlabel('predicted label')

plt.show()

## 4.3 SVC Support Vector Classifier

In [None]:
# training and fitting model
from sklearn.svm import SVC
model=SVC(kernel="linear")
model.fit(scale_xtrain,ytrain)
ypred=model.predict(scale_xtest)

In [None]:
# Evaluation of Model
from sklearn.metrics import confusion_matrix,classification_report,accuracy_score
svc_acc=accuracy_score(ytest,ypred)
print("Accuracy is:",svc_acc)
cm=confusion_matrix(ytest,ypred)
sns.heatmap(cm,annot=True)
print(classification_report(ytest,ypred))

## 4.4 Naive Byes

In [None]:
# training and fitting model
from sklearn.naive_bayes import GaussianNB
model=GaussianNB()
model.fit(xtrain,ytrain)
ypred=model.predict(xtest)

In [None]:
# Evaluation of Model
from sklearn.metrics import confusion_matrix,accuracy_score,classification_report
GNB_acc=accuracy_score(ytest,ypred)
print("Accuracy is:",GNB_acc)
cm=confusion_matrix(ytest,ypred)
sns.heatmap(cm,annot=True)
print(classification_report(ytest,ypred))

## 4.5 Decision Tree Classifier

In [None]:
from sklearn.tree import DecisionTreeClassifier
model=DecisionTreeClassifier()
model.fit(xtrain,ytrain)
ypred=model.predict(xtest)

In [None]:
from sklearn.metrics import confusion_matrix,accuracy_score,classification_report
Dt_acc=accuracy_score(ytest,ypred)
print("Accuracy is:",Dt_acc)
cm=confusion_matrix(ytest,ypred)
sns.heatmap(cm,annot=True)
print(classification_report(ytest,ypred))

## 4.6 Random Forest Classifier

In [None]:
from sklearn.ensemble import RandomForestClassifier
model=RandomForestClassifier(n_estimators=20)
model.fit(xtrain,ytrain)
ypred=model.predict(xtest)

In [None]:
from sklearn.metrics import confusion_matrix,classification_report,accuracy_score
RFC_acc=accuracy_score(ytest,ypred)
print("Accuracy is:",RFC_acc)
cm=confusion_matrix(ytest,ypred)
sns.heatmap(cm,annot=True)
print(classification_report(ytest,ypred))

## 4.7 Gradient Boosting Classifier

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
model=GradientBoostingClassifier()
model.fit(xtrain,ytrain)
ypred=model.predict(xtest)

In [None]:
from sklearn.metrics import confusion_matrix,classification_report,accuracy_score
GBC_acc=accuracy_score(ytest,ypred)
print("Accuracy is:",GBC_acc)
cm=confusion_matrix(ytest,ypred)
sns.heatmap(cm,annot=True)
print(classification_report(ytest,ypred))

## 4.8 AdaBoost Classifier

In [None]:
from sklearn.ensemble import AdaBoostClassifier
model=AdaBoostClassifier(n_estimators=5)
model.fit(xtrain,ytrain)
ypred=model.predict(xtest)

In [None]:
from sklearn.metrics import confusion_matrix,classification_report,accuracy_score
ABC_acc=accuracy_score(ytest,ypred)
print("Accuracy is:",ABC_acc)
cm=confusion_matrix(ytest,ypred)
sns.heatmap(cm,annot=True)
print(classification_report(ytest,ypred))

## 4.9 XG Boost Classifier

In [None]:
from xgboost import XGBClassifier
model=XGBClassifier()
model.fit(xtrain,ytrain)
ypred=model.predict(xtest)

In [None]:
from sklearn.metrics import confusion_matrix,classification_report,accuracy_score
XGB_acc=accuracy_score(ytest,ypred)
print("Accuracy is:",XGB_acc)
cm=confusion_matrix(ytest,ypred)
sns.heatmap(cm,annot=True)
print(classification_report(ytest,ypred))

## 4.10 Voting

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier,AdaBoostClassifier,GradientBoostingClassifier
models=[
    #("lr",LogisticRegression()),
    ("knn",KNeighborsClassifier(n_neighbors=5)),
    ("GNB",GaussianNB()),
    ("RF",RandomForestClassifier(n_estimators=50)),
    ('ABC',AdaBoostClassifier(n_estimators=50)),
    ('GBC',GradientBoostingClassifier(n_estimators=50)),
    ('SVM',SVC(C=0.1,probability=True))
]

In [None]:
# # training and fitting model
from sklearn.ensemble import VotingClassifier
model=VotingClassifier(estimators=models,voting="soft")
model.fit(xtrain,ytrain)
ypred=model.predict(xtest)

In [None]:
# Evaluation of Model
from sklearn.metrics import confusion_matrix,classification_report,accuracy_score
voting_acc=accuracy_score(ytest,ypred)
print("Accuracy is:",voting_acc)
cm=confusion_matrix(ytest,ypred)
sns.heatmap(cm,annot=True)
print(classification_report(ytest,ypred))

# 5. Accuracy of different Model

In [None]:
models=[("LogisticRegression",lg_acc),
    ("KNeighborsClassifier",knn_acc),
        ("SVM",svc_acc),
        ("DecisionTree",Dt_acc),
        ("GuessinNB",GNB_acc),
        ("RanodmForest",RFC_acc),
        ("Gradientboost",GBC_acc),
        ("AdaBoost",ABC_acc),
        ("XGboost",XGB_acc),
        ("Voting",voting_acc)
]

In [None]:
predict = pd.DataFrame(data = models, columns=['Model', "Accuracy"])
predict

In [None]:
# plogttin bargraph of r2score of each model
f, axe = plt.subplots(1,1, figsize=(15,10))
predict.sort_values(by=['Accuracy'], ascending=False, inplace=True)

sns.barplot(x='Accuracy', y='Model', data = predict, ax = axe)
axe.set_xticks(np.arange(0, 1.1, 0.1))
plt.show()

### This  graph clearly shows the XGboost model gives higher accuracy compare to other model.

# 6. Conclusion
In this activity I have explored the different classification models. Then visulized and measured performance of models.

<center>                               ****** END******  <center>