# Glass Classification

## Imports

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sklearn.preprocessing import StandardScaler
#from scipy.stats import zscore
from sklearn.model_selection import train_test_split
from sklearn.metrics import plot_confusion_matrix
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis


## Dataset Load

In [None]:

data = pd.read_csv('../input/glass/glass.csv')
data.head()


In [None]:
data.describe()

## Data Analysis

### Histograms

In [None]:
fig = make_subplots(rows=10, cols=3)

for i, col in enumerate(data.columns):
    fig.add_trace(go.Histogram(x=data[col], name=col), row=(i//3)+1, col=(i%3)+1)
    
fig.update_layout(height=1200,)
    
fig.show()

### Outliers

In [None]:
need_scaling = ["RI", "K", "Ca", "Ba", "Fe"]
for col in need_scaling:
    data = data[data[col] < data[col].quantile(.99)]

### Scaling

In [None]:
X = data.drop(columns=["Type"])
Y = data['Type']

In [None]:
scaler = StandardScaler()
data = pd.DataFrame(scaler.fit_transform(X, Y), columns=data.columns[:-1])
data['Type'] = Y
data.describe()

In [None]:
def make_box_plots():
    fig = make_subplots(
        rows = 4,
        cols = 3,
        subplot_titles = data.columns
    )

    i = 1
    j = 1

    for col in data.columns:
        fig.append_trace(go.Box(y=data[:][col]), i, j)
        if j < 3:
            j += 1
        else:
            j = 1
            i += 1
    fig.update_layout(
        autosize=False,
        width=1200,
        height=2000,
        margin=dict(
            l=50,
            r=50,
            b=100,
            t=100,
            pad=4
        ),
        paper_bgcolor="LightSteelBlue",
    )
    fig.show()
    
make_box_plots()

### Correlation Matrix

In [None]:
corr = data.corr()
corr.style.background_gradient(cmap='plasma').set_precision(2)

In [None]:
#data = data.drop(columns = ["Na", "Mg", "Al", "Si"])
np.abs(corr["Type"]).sort_values(ascending=False)

## Multidimensional Plot

In [None]:
df = data.drop(columns=["Na", "K", "Fe", "Si", "RI", "Ca"])
fig = px.scatter_3d(df, x="Mg", y="Al", z="Ba", color="Type")
fig.show()

## Train/Test Split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=30)

## KNN Model

In [None]:
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)

### Accuracy

In [None]:
score = knn.score(X_train, y_train)
score

### Confusion Matrix

In [None]:
plot_confusion_matrix(knn, X_test, y_test)
plt.show()

### Prediction Countour

In [None]:
fig = make_subplots(rows=1, cols=1)

fig.add_trace(go.Contour(
    x=X['Mg'],
    y=X['Al'],
    z=knn.predict(X_train),
    showscale=False,
    opacity=0.40,
    colorscale='portland'
), row=1, col=1)

fig.add_trace(go.Scatter(
    x=X['Mg'], 
    y=X['Al'],
    text=y_train,
    mode='markers',
    marker_symbol=y_train,
    marker=dict(color=y_train, colorscale='portland')
), row=1, col=1)

fig.update_layout(showlegend=False)

fig.show()

## 

## Linear Discriminant Analysis

### Dimensionality Reduction

In [None]:
lda = LinearDiscriminantAnalysis(n_components=1).fit(X_train, y_train)
reduced_X = lda.transform(X_train).reshape(-1, 1)
reduced_X

### Two Dimensional KNN

In [None]:
knn2 = KNeighborsClassifier()
knn2.fit(reduced_X, y_train)

### Updated Confusion Matrix

In [None]:
plot_confusion_matrix(knn2, reduced_X, y_train)
plt.show()

### Prediction Countour

In [None]:
fig = make_subplots(rows=1, cols=1)

fig.add_trace(go.Contour(
    x=reduced_X,
    y=y_train,
    z=knn2.predict(reduced_X),
    showscale=False,
    opacity=0.40,
    colorscale='portland'
), row=1, col=1)

fig.add_trace(go.Scatter(
    x=reduced_X, 
    y=y_train,
    text=y_train,
    mode='markers',
    marker_symbol=y_train,
    marker=dict(color=y_train, colorscale='portland')
), row=1, col=1)

fig.update_layout(showlegend=False)

fig.show()