<a href="https://colab.research.google.com/github/shztodka/AI-ML-course/blob/main/fashion-mnist-classification/fashion_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Install dependencies and import packages
First we need to install the libraries we will be using. 

In [None]:
import sys
!{sys.executable} -m pip install numpy tensorflow wandb

import wandb
import matplotlib.pyplot as plt

import sys
!{sys.executable} -m pip install numpy tensorflow
from IPython.display import clear_output
clear_output()
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers


### Define helper functions for logging histograms to WandB


In [None]:
def log_bar(x, y, title, x_name="x", y_name="y", keep_order=False):
    if keep_order:
        x = [f"{idx}: {x_}" for idx, x_ in enumerate(x)] # Make sure alphabetical sorting works
    table = wandb.Table(
        data=[[x, y] for x, y in zip(x, y)],
        columns=[x_name, y_name]
    )
    wandb.log({title: wandb.plot.bar(table, x_name, y_name, title=title)})


def create_histogram(data, min_value=None, max_value=None, bins=10):
    if min_value is None:
        min_value = data.min()
    if max_value is None:
        max_value = data.max()

    if isinstance(bins, int):
        bin_edges = np.linspace(min_value, max_value, num=bins)
    else:
        bin_edges = bins
        
    numbers, _ = np.histogram(data, bins=bin_edges)
    bin_names = [f"{lower:.1f}-{upper:.1f}" for lower, upper in zip(bin_edges[:-1], bin_edges[1:])]

    return bin_names, numbers

# DATA

## Downoading the dataset

In [None]:
(x_train_raw, y_train_raw), (x_test_raw, y_test_raw) = keras.datasets.fashion_mnist.load_data()

In [None]:
print('Training data shape : ', x_train_raw.shape, y_train_raw.shape)
print('Testing data shape : ', x_test_raw.shape, y_test_raw.shape)

In [None]:
classes = np.unique(y_train_raw)
num_classes = len(classes)
print('Number of classes: ', num_classes)
print('Classes : ', classes)

## Visualize the training data

In [None]:
from datetime import datetime

date_and_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

wandb_run = wandb.init(
    project="fashion-mnist-kreas",
    name=f"data {date_and_time}"
)

In [None]:
class_names = ['T_shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 
               'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']

i = 10
print(f"Sample {i} is number {class_names[y_train_raw[i]]}")
plt.imshow(x_train_raw[i])

In [None]:
# Log the datasets minimum and maximum intensities and datatype to the WandB summary
min_value = min(x_train_raw.min(), x_test_raw.min())
max_value = max(x_train_raw.max(), x_test_raw.max())
wandb_run.summary["raw"] = {"min": min_value, "max": max_value, "dtype": str(x_train_raw.dtype)}

# Create a new histogram of the image pixels intensities
bin_names, train_hist = create_histogram(x_train_raw)
log_bar(bin_names, train_hist, "Raw training data", x_name="bin", y_name="# pixels", keep_order=True)

## Data Preprocesing

In [None]:
# Normalize the values to the range -1...1
x_train_norm = x_train_raw.astype('float32') / 128 - 1
x_test_norm = x_test_raw.astype('float32') / 128 - 1

In [None]:
x_train_norm.min(), x_train_norm.max()

In [None]:
# Create a new histogram of the modified values
min_value = min(x_train_norm.min(), x_test_norm.min())
max_value = max(x_train_norm.max(), x_test_norm.max())
wandb_run.summary["preprocessed"] = {"min": min_value, "max": max_value, "dtype": str(x_train_norm.dtype)}

bin_names, train_hist = create_histogram(x_train_norm)
log_bar(bin_names, train_hist, "Preprocessed training data", x_name="bin", y_name="# pixels", keep_order=True)

In [None]:
i = 10
print(f"Sample {i} is number {class_names[y_train_raw[i]]}")
plt.imshow(x_train_norm[i])


image_list = []
for i in range(10):
  image = wandb.Image(x_train_norm[i], caption=f"{class_names[y_train_raw[i]]}")
  image_list.append(image)
  
#image = wandb.Image(x_train_norm[i], caption=f"Training sample {i}: a {class_names[y_train[i]]}")
wandb.log({"Example training images (preprocessed)": image_list})

## Labels

In [None]:
from collections import Counter

num_images_per_label = Counter(y_train_raw)
x, num_images = zip(*((str(x_), num_) for x_, num_ in sorted(num_images_per_label.items())))
log_bar(x, num_images, "Labels in training data", x_name="Label", y_name="# images")

num_images_per_label = Counter(y_test_raw)
x, num_images = zip(*((str(x_), num_) for x_, num_ in sorted(num_images_per_label.items())))
log_bar(x, num_images, "Labels in test data", x_name="Label", y_name="# images")

In [None]:
# Checking number of samples from different labels
num_images_per_label = Counter(y_train_raw)
min_number_of_labels = min(num_images_per_label.values())

num_images_per_label

In [None]:
# convert class vectors to binary class matrices
num_classes = 10
y_train_bin = keras.utils.to_categorical(y_train_raw, num_classes)
y_test_bin = keras.utils.to_categorical(y_test_raw, num_classes)
y_train_bin.shape

In [None]:
y_train_bin[1], y_train_raw[1]

In [None]:
x_train_norm[0].dtype

In [None]:
x_train = x_train_norm
x_test = x_test_norm

y_train = y_train_bin
y_test = y_test_bin

In [None]:
wandb.finish()

## Creating noisy dataset.


In [None]:
# Adding some noise to the training dataset.If the noise_factor is 0 the dataset will not be created.
noise_factor = 0.5

if noise_factor == 0:
  print("noise_fator=", noise_factor)

else:
  wandb_run = wandb.init(
    project="fashion-mnist-kreas",
    name=f"data-noisy {date_and_time}"
  )
  x_train_noisy = x_train_norm + noise_factor * np.random.normal(loc=0.0, scale=1.0, size=x_train_norm.shape)
  # Clip the values of the noisy images to be between -1 and 1
  x_train_noisy = np.clip(x_train_noisy, -1., 1.)
  x_train = x_train_noisy
  i = 10
  print(f"{class_names[y_train_raw[i]]}")
  plt.imshow(x_train_noisy[i])

  image_list = []
  for i in range(10):
    image = wandb.Image(x_train_noisy[i], caption=f"{class_names[y_train_raw[i]]}")
    image_list.append(image)
    
  #image = wandb.Image(x_train_norm[i], caption=f"Training sample (noisy) {i}: a {class_names[y_train[i]]}")
  wandb.log({"Examples training images (noisy)": image_list})
  wandb.finish()

# MODEL

In [None]:
def create_cnn(input_shape, output_classes, dropout):
    dropout1=dropout + 0.25
    return keras.Sequential(
        [
            keras.Input(shape=input_shape),
            layers.Conv2D(32, kernel_size=(3, 3), activation="relu"),
            layers.MaxPooling2D(pool_size=(2, 2)),
            layers.Dropout(dropout),
         
            layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
            layers.MaxPooling2D(pool_size=(2, 2)),
            layers.Dropout(dropout1),
         
            layers.Flatten(),
                    
            layers.Dense(output_classes, activation="softmax"),
        ]
    )

In [None]:
dropout = 0.25
batch_size = 64 #128+128
epochs = 20


wandb_run = wandb.init(
    project="fashion-mnist-kreas",
    name=f"training {date_and_time}",
    config={"batch_size": batch_size, "noise_faktor": noise_factor, "dropout": dropout}
)

In [None]:
# Create a Convolutional Neural Network that
# expects a 28x28 pixel image with 1 color chanel (gray) as input
model = create_cnn((28, 28, 1), 10, dropout)

model.compile(loss="categorical_crossentropy",
              optimizer="adam", metrics=["accuracy"])
# model.fit(x_train, y_train, batch_size=batch_size,
#           epochs=epochs, validation_split=0.1,
#           callbacks=[wandb.keras.WandbCallback()])



model_results = model.fit(x_train, y_train, batch_size=batch_size, 
                          epochs=epochs, 
                          validation_data=(x_test,y_test),
                          callbacks=[wandb.keras.WandbCallback()])

In [None]:
model_results.history.keys()

In [None]:
accuracy = model_results.history['accuracy']
val_accuracy = model_results.history['val_accuracy']
loss = model_results.history['loss']
val_loss = model_results.history['val_loss']
epochs = range(len(accuracy))
plt.plot(epochs, accuracy, 'bo', label='Training accuracy')
plt.plot(epochs, val_accuracy, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()


In [None]:
predictions = model.predict(x_test_norm)



In [None]:
predictions[10]

In [None]:
y_test

In [None]:
# use the trained model to generate predictions for the given number
# of validation data batches (num_batches)
val_predictions = model.predict(x_test)
ground_truth_class_ids = y_test.argmax(axis=1)
# take the argmax for each set of prediction scores
# to return the class id of the highest confidence prediction
top_pred_ids = val_predictions.argmax(axis=1)

# Log confusion matrix
# the key "conf_mat" is the id of the plot--do not change
# this if you want subsequent runs to show up on the same plot
wandb.log({"conf_mat" : wandb.plot.confusion_matrix(probs=None,
                        preds=top_pred_ids, y_true=ground_truth_class_ids,
                        class_names=class_names)})



# cm = wandb.plot.confusion_matrix(
#     y_true=y_test_bin,
#     preds=predictions,
#     class_names=class_names)
    
# wandb.log({"conf_mat": cm})

In [None]:
wandb.finish()