# Glass Type Classification using KNN and SVM

This notebook demonstrates glass type classification based on chemical composition using:
- K-Nearest Neighbors (KNN)
- Support Vector Machine (SVM)

## Dataset
The dataset contains information about different types of glass based on their chemical properties:
- **Features**: RI (Refractive Index), Na, Mg, Al, Si, K, Ca, Ba, Fe (chemical compositions)
- **Target**: Type (1-7, representing different glass types)

## 1. Import Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import warnings
warnings.filterwarnings('ignore')

# Set style for better visualizations
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

## 2. Load and Explore Data

In [None]:
# Load the dataset
df = pd.read_csv('glass.csv')

# Display first few rows
print("First 5 rows of the dataset:")
print(df.head())

# Display dataset information
print("\nDataset Information:")
print(df.info())

# Display statistical summary
print("\nStatistical Summary:")
print(df.describe())

In [None]:
# Check for missing values
print("Missing values:")
print(df.isnull().sum())

# Check class distribution
print("\nClass distribution:")
print(df['Type'].value_counts().sort_index())

## 3. Data Visualization

In [None]:
# Visualize class distribution
plt.figure(figsize=(10, 6))
df['Type'].value_counts().sort_index().plot(kind='bar', color='steelblue')
plt.title('Distribution of Glass Types', fontsize=16, fontweight='bold')
plt.xlabel('Glass Type', fontsize=12)
plt.ylabel('Count', fontsize=12)
plt.xticks(rotation=0)
plt.tight_layout()
plt.show()

In [None]:
# Correlation heatmap
plt.figure(figsize=(12, 10))
correlation_matrix = df.drop('Id', axis=1).corr()
sns.heatmap(correlation_matrix, annot=True, fmt='.2f', cmap='coolwarm', center=0)
plt.title('Correlation Matrix of Features', fontsize=16, fontweight='bold')
plt.tight_layout()
plt.show()

## 4. Data Preprocessing

In [None]:
# Separate features and target
X = df.drop(['Id', 'Type'], axis=1)
y = df['Type']

print(f"Features shape: {X.shape}")
print(f"Target shape: {y.shape}")

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

print(f"\nTraining set size: {X_train.shape[0]}")
print(f"Testing set size: {X_test.shape[0]}")

In [None]:
# Feature Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print("Feature scaling completed!")
print(f"\nScaled training data shape: {X_train_scaled.shape}")
print(f"Mean of scaled features: {X_train_scaled.mean(axis=0).round(2)}")
print(f"Std of scaled features: {X_train_scaled.std(axis=0).round(2)}")

## 5. Model Training - K-Nearest Neighbors (KNN)

In [None]:
# Find optimal K value using cross-validation
k_values = range(1, 21)
cv_scores = []

for k in k_values:
    knn = KNeighborsClassifier(n_neighbors=k)
    scores = cross_val_score(knn, X_train_scaled, y_train, cv=5, scoring='accuracy')
    cv_scores.append(scores.mean())

# Plot K values vs Cross-validation scores
plt.figure(figsize=(12, 6))
plt.plot(k_values, cv_scores, marker='o', linestyle='-', color='blue', linewidth=2, markersize=8)
plt.xlabel('K Value', fontsize=12)
plt.ylabel('Cross-Validation Accuracy', fontsize=12)
plt.title('KNN: K Value vs Cross-Validation Accuracy', fontsize=16, fontweight='bold')
plt.xticks(k_values)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

# Optimal K value
optimal_k = k_values[cv_scores.index(max(cv_scores))]
print(f"\nOptimal K value: {optimal_k}")
print(f"Best Cross-Validation Accuracy: {max(cv_scores):.4f}")

In [None]:
# Train KNN model with optimal K
knn_model = KNeighborsClassifier(n_neighbors=optimal_k)
knn_model.fit(X_train_scaled, y_train)

# Make predictions
y_pred_knn = knn_model.predict(X_test_scaled)

# Evaluate KNN model
knn_accuracy = accuracy_score(y_test, y_pred_knn)
print(f"KNN Model Accuracy: {knn_accuracy:.4f}")
print("\nClassification Report (KNN):")
print(classification_report(y_test, y_pred_knn))

In [None]:
# Confusion Matrix for KNN
plt.figure(figsize=(10, 8))
cm_knn = confusion_matrix(y_test, y_pred_knn)
sns.heatmap(cm_knn, annot=True, fmt='d', cmap='Blues', cbar=True)
plt.title('KNN Confusion Matrix', fontsize=16, fontweight='bold')
plt.xlabel('Predicted Label', fontsize=12)
plt.ylabel('True Label', fontsize=12)
plt.tight_layout()
plt.show()

## 6. Model Training - Support Vector Machine (SVM)

In [None]:
# Train SVM with different kernels
kernels = ['linear', 'rbf', 'poly']
svm_results = {}

for kernel in kernels:
    svm = SVC(kernel=kernel, random_state=42)
    svm.fit(X_train_scaled, y_train)
    y_pred = svm.predict(X_test_scaled)
    accuracy = accuracy_score(y_test, y_pred)
    svm_results[kernel] = accuracy
    print(f"SVM with {kernel} kernel - Accuracy: {accuracy:.4f}")

# Find best kernel
best_kernel = max(svm_results, key=svm_results.get)
print(f"\nBest SVM kernel: {best_kernel}")

In [None]:
# Train final SVM model with best kernel and hyperparameter tuning
param_grid = {
    'C': [0.1, 1, 10, 100],
    'gamma': ['scale', 'auto', 0.001, 0.01, 0.1, 1]
}

svm_grid = GridSearchCV(SVC(kernel=best_kernel, random_state=42), param_grid, cv=5, scoring='accuracy', n_jobs=-1)
svm_grid.fit(X_train_scaled, y_train)

print(f"Best parameters: {svm_grid.best_params_}")
print(f"Best cross-validation accuracy: {svm_grid.best_score_:.4f}")

# Use best model
svm_model = svm_grid.best_estimator_
y_pred_svm = svm_model.predict(X_test_scaled)

# Evaluate SVM model
svm_accuracy = accuracy_score(y_test, y_pred_svm)
print(f"\nSVM Model Accuracy: {svm_accuracy:.4f}")
print("\nClassification Report (SVM):")
print(classification_report(y_test, y_pred_svm))

In [None]:
# Confusion Matrix for SVM
plt.figure(figsize=(10, 8))
cm_svm = confusion_matrix(y_test, y_pred_svm)
sns.heatmap(cm_svm, annot=True, fmt='d', cmap='Greens', cbar=True)
plt.title('SVM Confusion Matrix', fontsize=16, fontweight='bold')
plt.xlabel('Predicted Label', fontsize=12)
plt.ylabel('True Label', fontsize=12)
plt.tight_layout()
plt.show()

## 7. Model Comparison

In [None]:
# Compare model performances
models = ['KNN', 'SVM']
accuracies = [knn_accuracy, svm_accuracy]

plt.figure(figsize=(10, 6))
bars = plt.bar(models, accuracies, color=['steelblue', 'forestgreen'], alpha=0.8, edgecolor='black', linewidth=1.5)
plt.ylabel('Accuracy', fontsize=12)
plt.title('Model Comparison: KNN vs SVM', fontsize=16, fontweight='bold')
plt.ylim(0, 1)

# Add accuracy values on bars
for bar, acc in zip(bars, accuracies):
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2., height,
             f'{acc:.4f}',
             ha='center', va='bottom', fontsize=12, fontweight='bold')

plt.tight_layout()
plt.show()

print("\nModel Performance Summary:")
print(f"KNN Accuracy: {knn_accuracy:.4f}")
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print(f"\nBest Model: {'KNN' if knn_accuracy > svm_accuracy else 'SVM'}")

## 8. Interactive Glass Type Classifier

Use the sliders below to input chemical properties and predict the glass type!

In [None]:
from ipywidgets import interact, FloatSlider, Output
from IPython.display import display, HTML

# Glass type descriptions
glass_types = {
    1: "Building Windows (Float Processed)",
    2: "Building Windows (Non-Float Processed)",
    3: "Vehicle Windows (Float Processed)",
    5: "Containers",
    6: "Tableware",
    7: "Headlamps"
}

# Output widget for displaying results
output = Output()

def predict_glass_type(RI, Na, Mg, Al, Si, K, Ca, Ba, Fe):
    """Predict glass type based on chemical properties"""
    with output:
        output.clear_output()
        
        # Create feature array
        features = np.array([[RI, Na, Mg, Al, Si, K, Ca, Ba, Fe]])
        
        # Scale features
        features_scaled = scaler.transform(features)
        
        # Make predictions using both models
        knn_pred = knn_model.predict(features_scaled)[0]
        svm_pred = svm_model.predict(features_scaled)[0]
        
        # Display results
        display(HTML('<h3 style="color: #2E86AB;">Prediction Results</h3>'))
        display(HTML('<hr style="border: 1px solid #2E86AB;">'))
        
        display(HTML(f'<p style="font-size: 16px;"><strong>KNN Prediction:</strong> Type {knn_pred} - {glass_types.get(knn_pred, "Unknown")}</p>'))
        display(HTML(f'<p style="font-size: 16px;"><strong>SVM Prediction:</strong> Type {svm_pred} - {glass_types.get(svm_pred, "Unknown")}</p>'))
        
        if knn_pred == svm_pred:
            display(HTML(f'<p style="color: green; font-size: 18px; font-weight: bold;">✓ Both models agree: Type {knn_pred}</p>'))
        else:
            display(HTML(f'<p style="color: orange; font-size: 18px; font-weight: bold;">⚠ Models disagree - KNN: Type {knn_pred}, SVM: Type {svm_pred}</p>'))

# Create interactive widget
print("\n" + "="*80)
print("INTERACTIVE GLASS TYPE CLASSIFIER")
print("="*80)
print("\nAdjust the sliders below to input chemical composition values:")
print("\n")

interact(predict_glass_type,
         RI=FloatSlider(value=1.52, min=1.51, max=1.54, step=0.001, description='RI:', continuous_update=False),
         Na=FloatSlider(value=13.0, min=10.0, max=17.0, step=0.1, description='Na:', continuous_update=False),
         Mg=FloatSlider(value=3.5, min=0.0, max=4.5, step=0.1, description='Mg:', continuous_update=False),
         Al=FloatSlider(value=1.2, min=0.0, max=3.5, step=0.1, description='Al:', continuous_update=False),
         Si=FloatSlider(value=72.5, min=69.0, max=75.0, step=0.1, description='Si:', continuous_update=False),
         K=FloatSlider(value=0.5, min=0.0, max=2.0, step=0.05, description='K:', continuous_update=False),
         Ca=FloatSlider(value=8.5, min=5.0, max=16.0, step=0.1, description='Ca:', continuous_update=False),
         Ba=FloatSlider(value=0.0, min=0.0, max=3.5, step=0.05, description='Ba:', continuous_update=False),
         Fe=FloatSlider(value=0.0, min=0.0, max=0.5, step=0.01, description='Fe:', continuous_update=False))

display(output)

## 9. Sample Predictions

Let's test the classifier with some sample data points from our test set:

In [None]:
# Make predictions on sample test data
n_samples = 5
sample_indices = np.random.choice(X_test.index, n_samples, replace=False)

print("\nSample Predictions:")
print("="*100)

for idx in sample_indices:
    true_type = y_test[idx]
    features = X_test.loc[idx].values.reshape(1, -1)
    features_scaled = scaler.transform(features)
    
    knn_pred = knn_model.predict(features_scaled)[0]
    svm_pred = svm_model.predict(features_scaled)[0]
    
    print(f"\nSample ID: {idx}")
    print(f"True Type: {true_type} - {glass_types.get(true_type, 'Unknown')}")
    print(f"KNN Prediction: {knn_pred} - {glass_types.get(knn_pred, 'Unknown')} {'✓' if knn_pred == true_type else '✗'}")
    print(f"SVM Prediction: {svm_pred} - {glass_types.get(svm_pred, 'Unknown')} {'✓' if svm_pred == true_type else '✗'}")
    print("-"*100)

## 10. Conclusion

This notebook demonstrated:
1. ✓ Data loading and preprocessing
2. ✓ Exploratory data analysis with visualizations
3. ✓ Feature scaling and train-test split
4. ✓ KNN model training with hyperparameter tuning
5. ✓ SVM model training with kernel selection and hyperparameter optimization
6. ✓ Model evaluation and comparison
7. ✓ Interactive classifier with chemical property sliders

Both models achieved good performance in classifying glass types based on chemical composition. The interactive classifier allows users to input custom chemical properties and get real-time predictions from both models.