In [None]:
!pip install tensorflow
from tensorflow import keras

(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()


In [None]:
print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)

In [None]:
import matplotlib.pyplot as plt

plt.imshow(x_train[0], cmap="gray") # cmap="gray" to make it balck & white
plt.title(f"Label: {y_train[0]}")
plt.axis("off")
plt.show()

In [None]:
num_images = 12
plt.figure(figsize=(10, 4))
for i in range(num_images):
    plt.subplot(3, 4, i + 1)
    plt.imshow(x_train[i], cmap="gray")
    plt.title(f"Label: {y_train[i]}")
    plt.axis("off")
plt.tight_layout()
plt.show()


In [None]:
!pip install seaborn
!pip install numpy

import seaborn as sns
import numpy as np
unique, counts = np.unique(y_train, return_counts=True)
class_distribution = dict(zip(unique, counts))

# Create a bar plot
plt.figure(figsize=(10, 6))
sns.barplot(x=list(class_distribution.keys()), y=list(class_distribution.values()))
plt.title("Class Distribution in MNIST Dataset")
plt.xlabel("Digits")
plt.ylabel("Num of Samples")
plt.xticks(rotation=0) # force them to be horizontal
plt.show()

In [None]:
base_model = keras.Sequential([
    keras.layers.Flatten(input_shape=(28, 28)),
    keras.layers.Dense(128, activation="relu"),
    keras.layers.Dense(10, activation="softmax")
])


In [None]:
# Normalize pixel values to [0,1]
x_train = x_train / 255.0
x_test = x_test / 255.0

base_model.compile(optimizer="adam",
              loss="sparse_categorical_crossentropy",
              metrics=["accuracy"])
#base_model.fit(x_train, y_train, epochs=5)
base_history = base_model.fit(
    x_train, y_train,
    epochs=5,
    validation_data=(x_test, y_test)
)


In [None]:
print(base_history.history.keys())

plt.plot(base_history.history["accuracy"], label="Training Accuracy")
if "val_accuracy" in base_history.history:
    plt.plot(base_history.history["val_accuracy"], label="Validation Accuracy")
plt.title("Model Accuracy Over Epochs")
plt.ylabel("Accuracy")
plt.xlabel("Epoch")
plt.legend()
plt.show()

In [None]:
CNNmodel = keras.Sequential([
    keras.layers.Reshape((28, 28, 1), input_shape=(28, 28)),
    keras.layers.Conv2D(32, (3, 3), activation="relu"),
    keras.layers.MaxPooling2D((2, 2)),
    keras.layers.Flatten(),
    keras.layers.Dense(10, activation="softmax")
])


In [None]:
# Normalize pixel values to [0,1]
x_train = x_train / 255.0
x_test = x_test / 255.0

CNNmodel.compile(optimizer="adam",
              loss="sparse_categorical_crossentropy",
              metrics=["accuracy"])
#CNNmodel.fit(x_train, y_train, epochs=5)
CNN_history = CNNmodel.fit(
    x_train, y_train,
    epochs=5,
    validation_data=(x_test, y_test)
)

In [None]:
plt.plot(CNN_history.history["accuracy"], label="Training Accuracy")
if "val_accuracy" in CNN_history.history:
    plt.plot(CNN_history.history["val_accuracy"], label="Validation Accuracy")
plt.title("Model Accuracy Over Epochs")
plt.ylabel("Accuracy")
plt.xlabel("Epoch")
plt.legend()
plt.show()

what if we run them for longer does that help?

In [None]:
base_history_lt = base_model.fit(
    x_train, y_train,
    epochs=15,
    validation_data=(x_test, y_test)
)

In [None]:
plt.plot(base_history_lt.history["accuracy"], label="Training Accuracy")
if "val_accuracy" in base_history_lt.history:
    plt.plot(base_history_lt.history["val_accuracy"], label="Validation Accuracy")
plt.title("Model Accuracy Over Epochs")
plt.ylabel("Accuracy")
plt.xlabel("Epoch")
plt.legend()
plt.show()

In [None]:
CNN_history_lt = CNNmodel.fit(
    x_train, y_train,
    epochs=15,
    validation_data=(x_test, y_test) # for val accuracy
)

In [None]:
plt.plot(CNN_history_lt.history["accuracy"], label="Training Accuracy")
if "val_accuracy" in CNN_history_lt.history:
    plt.plot(CNN_history_lt.history["val_accuracy"], label="Validation Accuracy")
plt.title("Model Accuracy Over Epochs")
plt.ylabel("Accuracy")
plt.xlabel("Epoch")
plt.legend()
plt.show()

Extra visualizations on how the machine identifies the numbers

In [None]:
img = x_test[0] # first image from test set
plt.imshow(img.squeeze(), cmap="gray")
plt.title("Input Image")
plt.axis("off")
plt.show()

In [None]:
pred = np.argmax(CNNmodel.predict(img[np.newaxis, ...]))
print(f"Predicted class: {pred}")

In [None]:
for i, layer in enumerate(CNNmodel.layers):
    print(i, layer.name)

In [None]:
import tensorflow as tf

# i had problems showingcasing anything from my previous models due to some unknown errors, so i recreated the model here*
#CNNmodel = keras.Sequential([
#    keras.layers.Reshape((28, 28, 1), input_shape=(28, 28)),
#    keras.layers.Conv2D(32, (3, 3), activation="relu"),
#    keras.layers.MaxPooling2D((2, 2)),
#    keras.layers.Flatten(),
#    keras.layers.Dense(10, activation="softmax")
#])

inputs = tf.keras.Input(shape=(28, 28), name="input_layer")
x = tf.keras.layers.Reshape((28, 28, 1))(inputs)
x = tf.keras.layers.Conv2D(32, (3, 3), activation="relu", padding="valid", name="conv1")(x) # valid padding to be the same as the original one
x = tf.keras.layers.MaxPooling2D((2, 2), name="pool1")(x)
x = tf.keras.layers.Flatten()(x)
outputs = tf.keras.layers.Dense(10, activation="softmax")(x)

functional_model = tf.keras.Model(inputs=inputs, outputs=outputs)
functional_model.set_weights(CNNmodel.get_weights())


* *Apparently sequential models cannot do this line "activation_model = tf.keras.Model(inputs=functional_model.input, outputs=layer_outputs)"
because the tensors are "skipped" when they are made in sequential models but when they are explicity made with tensors as i did now they work well

After some consideration i decided to not commit my tries because whatever I tried it simply didn"t work so I just gave up with trying it

In [None]:
target_digit = 6
indices = np.where(y_train == target_digit)[0] # get the first digit for whatever digit i want
img = x_train[indices[0]]

layer_outputs = [functional_model.get_layer("conv1").output]
activation_model = tf.keras.Model(inputs=functional_model.input, outputs=layer_outputs)
activations = activation_model.predict(img[np.newaxis, ...], verbose=0)[0]

activity_per_filter = np.sum(activations, axis=(0, 1)) #activity is all the pixel values summed up

top_indices = np.argsort(activity_per_filter)[::-1][:10] # top 10 most used filters

print(f"10 most active filters: {top_indices}")
print(f"Activity values: {activity_per_filter[top_indices]}")

Let"s see visually 

In [None]:
fig, axes = plt.subplots(1, 10, figsize=(15, 8))
for plot_idx, filter_idx in enumerate(top_indices):
    ax = axes[plot_idx]
    feature_map = activations[:, :, filter_idx]
    
    # only get those that are actually used + normalize them
    if feature_map.max() > 0:
        feature_map = (feature_map - feature_map.min()) / (feature_map.max() - feature_map.min())
    
    ax.imshow(feature_map, cmap="gray")
    ax.set_title(f"Filter {filter_idx}\nActivity: {activity_per_filter[filter_idx]:.2f}")
    ax.axis("off")

plt.suptitle(f"Top Active Feature Maps (Digit {target_digit})")
plt.show()

Since all of them look the same lets see if the kernels that go through them are also the same or not

In [None]:
weights = CNNmodel.layers[1].get_weights()[0] # get all weights from the first Conv2D layer

fig, axes = plt.subplots(1, 10, figsize=(12, 4))
for i, idx in enumerate(top_indices):
    ax = axes[i]
    # get the specific 3x3 kernel for this filter
    kernel = weights[:, :, 0, idx]
    kernel_norm = (kernel - kernel.min()) / (kernel.max() - kernel.min())
    
    ax.imshow(kernel_norm, cmap="gray")
    ax.set_title(f"Filter {idx}")
    ax.axis("off")

plt.suptitle("The difference between kernels and active feature maps")
plt.show()

fashion MNIST with the same models

In [None]:
(fx_train, fy_train), (fx_test, fy_test) = keras.datasets.fashion_mnist.load_data()

print(fx_train.shape)  # (60000, 28, 28)
print(fy_train.shape)  # (60000,)

In [None]:
plt.imshow(fx_train[0], cmap="gray") # cmap="gray" to make it balck & white
plt.title(f"Label: {fy_train[0]}")
plt.axis("off")
plt.show()

In [None]:
base_history_f = base_model.fit(
    fx_train, fy_train,
    epochs=20,
    validation_data=(fx_test, fy_test)
)

In [None]:
plt.plot(base_history_f.history["accuracy"], label="Training Accuracy")
if "val_accuracy" in base_history_f.history:
    plt.plot(base_history_f.history["val_accuracy"], label="Validation Accuracy")
plt.title("Model Accuracy Over Epochs (Fashion, Base)")
plt.ylabel("Accuracy")
plt.xlabel("Epoch")
plt.legend()
plt.show()

In [None]:
CNN_history_f = CNNmodel.fit(
    fx_train, fy_train,
    epochs=20,
    validation_data=(fx_test, fy_test)
)

In [None]:
plt.plot(CNN_history_f.history["accuracy"], label="Training Accuracy")
if "val_accuracy" in CNN_history_f.history:
    plt.plot(CNN_history_f.history["val_accuracy"], label="Validation Accuracy")
plt.title("Model Accuracy Over Epochs (Fashion, CNN)")
plt.ylabel("Accuracy")
plt.xlabel("Epoch")
plt.legend()
plt.show()

pretty good basic scores, but fluctuates a lot which is expected due to varience

Next i will look at what are some ambiguous (?) pictures

In [None]:
probs = base_model.predict(fx_test)
conf = np.max(probs, axis=1)
pred_labels = np.argmax(probs, axis=1)
ambi = np.where(conf < 0.8)[0]
print(f"Ambiguous samples: {len(ambi)}")

These are the classes and the 

In [None]:
class_names = ["T-shirt/top", "Trouser", "Pullover", "Dress", "Coat",
               "Heels", "Shirt", "Sneaker", "Bag", "boot"]

fig, axes = plt.subplots(2, 5, figsize=(12, 5))
for label in range(10):
    # First index with this label
    idx = np.where(fy_train == label)[0][0]
    row, col = divmod(label, 5)
    axes[row, col].imshow(fx_train[idx], cmap="gray")
    axes[row, col].set_title(f"{label}: {class_names[label]}")
    axes[row, col].axis("off")
plt.tight_layout()
plt.show()


In [None]:
fig, axes = plt.subplots(2, 5, figsize=(15, 6))
ambi = np.sort(ambi)
for i, idx in enumerate(ambi[:10]):
    row, col = divmod(i, 5)
    axes[row, col].imshow(fx_test[idx].squeeze(), cmap="gray")
    top_prob = np.max(probs[idx])
    top_class = class_names[pred_labels[idx]]
    axes[row, col].set_title(f"Pred: {top_class}, Conf: {top_prob:.2f}")
    axes[row, col].axis("off")
plt.tight_layout()
plt.show()

### Extra section for the live coding task
Should be 10 lines of code, will probably make some pretty graphs