In [1]:
# Importing necessary libraries
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from bokeh.plotting import figure, output_file, save
from bokeh.models import Range1d, LinearAxis, FactorRange, ColumnDataSource, LabelSet, LinearColorMapper
from bokeh.io import output_notebook
from bokeh.palettes import Greys256

from py_ml_tools.setup import load_label_datasets, setup_cuda, find_available_GPUs
from bokeh.models import ColumnDataSource, LabelSet
from bokeh.transform import transform

import matplotlib.pyplot as plt
import numpy as np

2023-06-29 06:46:24.774079: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-06-29 06:46:25.350575: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-06-29 06:46:25.354122: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [1]:
def plot_sample_digits(x_train, y_train):
    # Convert one-hot encoded y_train back to label format
    y_train_labels = np.argmax(y_train.numpy(), axis=1)

    # Convert x_train to numpy
    x_train_np = x_train.numpy()

    # Prepare an example of each digit
    examples = {}
    for i in range(10):
        examples[i] = x_train_np[np.argwhere(y_train_labels == i)[0][0]].reshape(28, 28)
        
    # Prepare the figure
    fig, axes = plt.subplots(2, 5, figsize=(10,4))

    for i in range(10):
        ax = axes[i//5, i%5]
        ax.imshow(examples[i], cmap='gray', aspect='auto')
        ax.axis('off')  # hide the axes ticks

    plt.subplots_adjust(wspace=0.1, hspace=0.1)
    plt.savefig("mnist_examples.png")

In [None]:
# Plot loss and accuracy over epochs
def plot_metrics(loss_per_epoch, accuracy_per_epoch):
    # Create a new plot with a range for the loss [left]
    p = figure(title="Metrics over epochs", x_axis_label='Epochs', y_axis_label='Loss')

    # Set range for loss axis
    loss_max = max(loss_per_epoch) + 0.05 * max(loss_per_epoch)  # for example, add 5% as a padding to the max value
    p.y_range = Range1d(0, loss_max)  # adjust according to your loss data
    
    # Plot loss
    p.line(np.arange(len(loss_per_epoch)), loss_per_epoch, legend_label="Loss", line_color="red")
    
    # Create a new range for the accuracy [right]
    p.extra_y_ranges = {"AccuracyRange": Range1d(start=0, end=100)} # Update the range
    p.add_layout(LinearAxis(y_range_name="AccuracyRange", axis_label="Accuracy (%)"), 'right') # Update the label

    # Scale accuracy values to percentage and plot
    accuracy_per_epoch_percentage = [acc * 100 for acc in accuracy_per_epoch] # Scale accuracy
    p.line(np.arange(len(accuracy_per_epoch_percentage)), accuracy_per_epoch_percentage, legend_label="Accuracy", line_color="blue", y_range_name="AccuracyRange") # Plot scaled accuracy

    # Save the plot
    output_file("metrics_plot.html")
    save(p)

In [None]:
def plot_model_output(model, output_digit, x_data, y_data):
    # Convert one-hot encoded y_train back to label format
    y_labels = np.argmax(y_data, axis=1)
    
    # Find the first instance of '0'
    zero_index = np.argwhere(y_labels == output_digit)[0][0]
    zero_input = tf.reshape(x_data[zero_index], (1, -1)) # Reshape it to match model input shape

    # Calculate model output
    output = model(zero_input).numpy()[0]

    # Prepare data for bar chart
    digits = [str(i) for i in range(10)][::-1]
    output_values = output.tolist()[::-1]
    
    # Format output values to have 2 decimal places
    formatted_output_values = [f"{val:.2E}" for val in output_values]

    # Create a new figure with a white background
    p = figure(y_range=FactorRange(factors=digits), 
               width=500, 
               title="Model Output for First Instance of Digit 0",
               y_axis_label='Digits',
               x_axis_label='Model Output (~Probability)',
               background_fill_color="white")

    # Create a ColumnDataSource with the data
    source = ColumnDataSource(data=dict(digits=digits, output_values=output_values, formatted_output_values=formatted_output_values))

    # Add a horizontal bar chart to the figure with black bars
    p.hbar(y='digits', right='output_values', height=0.5, color="black", source=source)

    # Create labels for the bars with color based on a condition
    color_mapper = LinearColorMapper(palette=['black', 'white'], low=min(output_values), high=max(output_values))
    labels = LabelSet(x='output_values', y='digits', text='formatted_output_values', level='annotation',
                      x_offset=5, y_offset=-10, text_align='left', text_color=transform('output_values', color_mapper), source=source)
    
    print(formatted_output_values)
    # Add labels to the plot
    p.add_layout(labels)

    p.xaxis.axis_label_text_font_size = "20pt"
    p.yaxis.axis_label_text_font_size = "20pt"
    p.xaxis.major_label_text_font_size = "15pt"
    p.yaxis.major_label_text_font_size = "15pt"

    # Remove the grid
    p.xgrid.grid_line_color = None
    p.ygrid.grid_line_color = None

    # Save the plot
    output_file(f"output_plot_for_{output_digit}.html")
    save(p)

In [None]:
# Step 1: Load and prepare the MNIST dataset.
def load_data():
    # This data is already split into train and test datasets.
    (x_train, y_train), (x_test, y_test) = mnist.load_data()

    # Reshape and normalize the images to feed into the neural network.
    x_train, x_test = tf.cast(x_train.reshape(-1, 784)/255.0, tf.float32), \
    tf.cast(x_test.reshape(-1, 784)/255.0, tf.float32)

    # Convert labels to one-hot vectors. This is necessary as our output layer 
    # will have 10 neurons, one for each digit from 0 to 9.
    y_train, y_test = tf.one_hot(y_train, depth=10), tf.one_hot(y_test, depth=10)
    
    return x_train, y_train, x_test, y_test