# Fashion MNIST Classifier - Enhanced Notebook

This notebook demonstrates an improved approach to the Fashion MNIST classification problem with:
- Data normalization
- Validation split
- Training callbacks
- Comprehensive visualizations
- Model evaluation

## 1. Setup and Imports

In [None]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report

print(f"TensorFlow version: {tf.__version__}")
print(f"GPU Available: {tf.config.list_physical_devices('GPU')}")

## 2. Load and Explore Data

In [None]:
# Load dataset
(train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.fashion_mnist.load_data()

# Class names
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 
               'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']

print(f"Training images shape: {train_images.shape}")
print(f"Training labels shape: {train_labels.shape}")
print(f"Test images shape: {test_images.shape}")
print(f"Test labels shape: {test_labels.shape}")
print(f"Number of classes: {len(class_names)}")
print(f"Pixel value range: [{train_images.min()}, {train_images.max()}]")

In [None]:
# Visualize sample images
plt.figure(figsize=(15, 6))
for i in range(20):
    plt.subplot(4, 5, i+1)
    plt.imshow(train_images[i], cmap='gray')
    plt.title(class_names[train_labels[i]], fontsize=10)
    plt.axis('off')
plt.suptitle('Sample Images from Fashion MNIST Dataset', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

In [None]:
# Class distribution
unique, counts = np.unique(train_labels, return_counts=True)
plt.figure(figsize=(12, 5))
plt.bar(class_names, counts, color='skyblue', edgecolor='black')
plt.xlabel('Class', fontsize=12)
plt.ylabel('Count', fontsize=12)
plt.title('Class Distribution in Training Set', fontsize=14, fontweight='bold')
plt.xticks(rotation=45, ha='right')
plt.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.show()

## 3. Data Preprocessing

In [None]:
# Normalize pixel values to [0, 1]
train_images = train_images.astype('float32') / 255.0
test_images = test_images.astype('float32') / 255.0

# Add channel dimension for CNN
train_images = np.expand_dims(train_images, axis=-1)
test_images = np.expand_dims(test_images, axis=-1)

print(f"Normalized training images shape: {train_images.shape}")
print(f"Pixel value range after normalization: [{train_images.min():.2f}, {train_images.max():.2f}]")

## 4. Build Improved CNN Model

In [None]:
# Create CNN model with batch normalization and dropout
model = tf.keras.Sequential([
    # First convolutional block
    tf.keras.layers.Conv2D(32, (3, 3), padding='same', activation='relu', input_shape=(28, 28, 1)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Conv2D(32, (3, 3), padding='same', activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Dropout(0.3),
    
    # Second convolutional block
    tf.keras.layers.Conv2D(64, (3, 3), padding='same', activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Conv2D(64, (3, 3), padding='same', activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Dropout(0.3),
    
    # Third convolutional block
    tf.keras.layers.Conv2D(128, (3, 3), padding='same', activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.3),
    
    # Dense layers
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(10)
])

model.summary()

## 5. Compile Model

In [None]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=['accuracy', 
             tf.keras.metrics.SparseTopKCategoricalAccuracy(k=3, name='top_3_accuracy')]
)

## 6. Setup Callbacks

In [None]:
callbacks = [
    tf.keras.callbacks.EarlyStopping(
        monitor='val_loss',
        patience=5,
        restore_best_weights=True,
        verbose=1
    ),
    tf.keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=3,
        min_lr=1e-7,
        verbose=1
    ),
    tf.keras.callbacks.ModelCheckpoint(
        'best_model.h5',
        monitor='val_accuracy',
        save_best_only=True,
        verbose=1
    )
]

## 7. Train Model

In [None]:
history = model.fit(
    train_images,
    train_labels,
    batch_size=32,
    epochs=30,
    validation_split=0.2,
    callbacks=callbacks,
    verbose=1
)

## 8. Visualize Training History

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))

# Plot loss
ax1.plot(history.history['loss'], label='Training Loss', linewidth=2)
ax1.plot(history.history['val_loss'], label='Validation Loss', linewidth=2)
ax1.set_xlabel('Epoch', fontsize=12)
ax1.set_ylabel('Loss', fontsize=12)
ax1.set_title('Model Loss', fontsize=14, fontweight='bold')
ax1.legend(fontsize=10)
ax1.grid(True, alpha=0.3)

# Plot accuracy
ax2.plot(history.history['accuracy'], label='Training Accuracy', linewidth=2)
ax2.plot(history.history['val_accuracy'], label='Validation Accuracy', linewidth=2)
ax2.set_xlabel('Epoch', fontsize=12)
ax2.set_ylabel('Accuracy', fontsize=12)
ax2.set_title('Model Accuracy', fontsize=14, fontweight='bold')
ax2.legend(fontsize=10)
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 9. Evaluate on Test Set

In [None]:
test_loss, test_accuracy, test_top3_acc = model.evaluate(test_images, test_labels, verbose=1)

print(f"\n{'='*60}")
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f} ({test_accuracy*100:.2f}%)")
print(f"Test Top-3 Accuracy: {test_top3_acc:.4f} ({test_top3_acc*100:.2f}%)")
print(f"{'='*60}")

## 10. Make Predictions

In [None]:
# Get predictions
predictions = model.predict(test_images)
predicted_labels = np.argmax(predictions, axis=1)

## 11. Confusion Matrix

In [None]:
cm = confusion_matrix(test_labels, predicted_labels)

plt.figure(figsize=(12, 10))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=class_names, yticklabels=class_names,
            cbar_kws={'label': 'Count'})
plt.xlabel('Predicted Label', fontsize=12)
plt.ylabel('True Label', fontsize=12)
plt.title('Confusion Matrix', fontsize=14, fontweight='bold')
plt.xticks(rotation=45, ha='right')
plt.yticks(rotation=0)
plt.tight_layout()
plt.show()

## 12. Classification Report

In [None]:
print("\n" + "="*70)
print("CLASSIFICATION REPORT")
print("="*70)
print(classification_report(test_labels, predicted_labels, target_names=class_names))

## 13. Visualize Sample Predictions

In [None]:
# Get probability predictions
probability_model = tf.keras.Sequential([model, tf.keras.layers.Softmax()])
probabilities = probability_model.predict(test_images[:25])

# Plot predictions
fig, axes = plt.subplots(5, 5, figsize=(15, 15))
axes = axes.flatten()

for i in range(25):
    ax = axes[i]
    img = test_images[i].squeeze()
    ax.imshow(img, cmap='gray')
    
    true_label = test_labels[i]
    pred_label = predicted_labels[i]
    confidence = probabilities[i][pred_label]
    
    color = 'green' if true_label == pred_label else 'red'
    ax.set_title(f"True: {class_names[true_label]}\nPred: {class_names[pred_label]}\n({confidence:.2%})",
                color=color, fontsize=9)
    ax.axis('off')

plt.suptitle('Sample Predictions (Green=Correct, Red=Wrong)', 
            fontsize=14, fontweight='bold', y=0.995)
plt.tight_layout()
plt.show()

## 14. Detailed Prediction Example

In [None]:
# Choose a random test image
idx = np.random.randint(0, len(test_images))
image = test_images[idx]
true_label = test_labels[idx]

# Get prediction
pred_probs = probability_model.predict(np.expand_dims(image, axis=0))[0]
pred_label = np.argmax(pred_probs)

# Visualize
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))

# Display image
ax1.imshow(image.squeeze(), cmap='gray')
color = 'green' if pred_label == true_label else 'red'
ax1.set_title(f"True: {class_names[true_label]}\nPredicted: {class_names[pred_label]}\nConfidence: {pred_probs[pred_label]:.2%}",
             color=color, fontsize=12, fontweight='bold')
ax1.axis('off')

# Display probability distribution
indices = np.arange(len(class_names))
colors = ['green' if i == pred_label else 'skyblue' for i in indices]
ax2.barh(indices, pred_probs, color=colors)
ax2.set_yticks(indices)
ax2.set_yticklabels(class_names)
ax2.set_xlabel('Probability', fontsize=11)
ax2.set_title('Class Probabilities', fontsize=12, fontweight='bold')
ax2.set_xlim([0, 1])
ax2.grid(axis='x', alpha=0.3)

plt.tight_layout()
plt.show()

## 15. Save Model

In [None]:
model.save('fashion_mnist_cnn_final.h5')
print("Model saved successfully!")

## Summary

This enhanced notebook demonstrates:

✅ **Data Preprocessing**: Normalization and proper shape handling

✅ **Improved Architecture**: CNN with batch normalization and dropout

✅ **Training Callbacks**: Early stopping, learning rate reduction, and model checkpointing

✅ **Comprehensive Evaluation**: Test accuracy, confusion matrix, and classification report

✅ **Rich Visualizations**: Training history, predictions, and probability distributions

### Next Steps:

- Try different architectures (deeper networks, different filter sizes)
- Experiment with data augmentation
- Use the trained model with the API or Gradio interface
- Deploy using Docker containers