In [None]:
# ==============================================================
# EXPERIMENT 3: Application of DL Framework for Classification
# Dataset: Fashion-MNIST (Kaggle)
# Framework: TensorFlow + Keras
# ==============================================================

# ----------------------------
# 1. Import Required Libraries
# ----------------------------
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.utils import to_categorical, plot_model
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import label_binarize

# ----------------------------
# 2. Load Dataset
# ----------------------------
train_path = "EXP_3_DATASET/fashion-mnist_train.csv"
test_path = "EXP_3_DATASET/fashion-mnist_test.csv"

train_df = pd.read_csv(train_path)
test_df = pd.read_csv(test_path)

print("Training Data Shape:", train_df.shape)
print("Testing Data Shape:", test_df.shape)

# ----------------------------
# 3. Data Preprocessing
# ----------------------------

# Separate features and labels
X_train_full = train_df.drop("label", axis=1).values
y_train_full = train_df["label"].values

X_test = test_df.drop("label", axis=1).values
y_test = test_df["label"].values

# Normalize pixel values (0-255 -> 0-1)
X_train_full = X_train_full / 255.0
X_test = X_test / 255.0

# Split into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(
    X_train_full, y_train_full, test_size=0.2, random_state=42
)

# One-hot encode labels
y_train_cat = to_categorical(y_train, num_classes=10)
y_val_cat = to_categorical(y_val, num_classes=10)
y_test_cat = to_categorical(y_test, num_classes=10)

print("Training Set:", X_train.shape, y_train_cat.shape)
print("Validation Set:", X_val.shape, y_val_cat.shape)
print("Test Set:", X_test.shape, y_test_cat.shape)

# ----------------------------
# 4. EDA: Visualizing Some Images
# ----------------------------
fig, axes = plt.subplots(1, 5, figsize=(12, 4))
for i, ax in enumerate(axes):
    ax.imshow(X_train[i].reshape(28, 28), cmap="gray")
    ax.set_title(f"Label: {y_train[i]}")
    ax.axis("off")
plt.suptitle("Sample Images from Fashion-MNIST Dataset")
plt.show()

# ----------------------------
# 5. Build Sequential Neural Network
# ----------------------------
model = Sequential([
    Flatten(input_shape=(784,)),
    Dense(256, activation="relu"),
    Dropout(0.3),
    Dense(128, activation="relu"),
    Dense(10, activation="softmax")
])

# Visualize Model Architecture
plot_model(model, to_file="EXP_3_MODEL_ARCHITECTURE.png", show_shapes=True, show_layer_names=True)

# ----------------------------
# 6. Compile the Model
# ----------------------------
model.compile(
    optimizer="adam",
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)

# ----------------------------
# 7. Train the Model
# ----------------------------
history = model.fit(
    X_train, y_train_cat,
    validation_data=(X_val, y_val_cat),
    epochs=20,
    batch_size=64,
    verbose=1
)

# ----------------------------
# 8. Plot Accuracy and Loss Curves
# ----------------------------
plt.figure(figsize=(12, 5))

# Accuracy Curve
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label="Train Accuracy")
plt.plot(history.history['val_accuracy'], label="Validation Accuracy")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.title("Training vs Validation Accuracy")
plt.legend()
plt.grid()

# Loss Curve
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label="Train Loss")
plt.plot(history.history['val_loss'], label="Validation Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.title("Training vs Validation Loss")
plt.legend()
plt.grid()

plt.show()

# ----------------------------
# 9. Evaluate Model on Test Set
# ----------------------------
test_loss, test_acc = model.evaluate(X_test, y_test_cat, verbose=0)
print(f"\nTest Accuracy: {test_acc*100:.2f}%")
print(f"Test Loss: {test_loss:.4f}")

# ----------------------------
# 10. Confusion Matrix
# ----------------------------
y_pred = np.argmax(model.predict(X_test), axis=1)
cm = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(10, 7))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()

# ----------------------------
# 11. Classification Report
# ----------------------------
print("\nClassification Report:\n")
print(classification_report(y_test, y_pred))

# ----------------------------
# 12. ROC Curve (One-vs-Rest)
# ----------------------------
y_test_bin = label_binarize(y_test, classes=np.arange(10))
y_score = model.predict(X_test)

fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(10):
    fpr[i], tpr[i], _ = roc_curve(y_test_bin[:, i], y_score[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

plt.figure(figsize=(10, 7))
for i in range(10):
    plt.plot(fpr[i], tpr[i], label=f"Class {i} (AUC={roc_auc[i]:.2f})")
plt.plot([0, 1], [0, 1], "k--")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve (Multi-Class)")
plt.legend()
plt.show()
