In [None]:
import os.path
import tensorflow as tf
import numpy as np

print(tf.__version__)

In [None]:
if not os.path.isfile('images/images.tgz'):
  !mkdir images
  !wget --no-check-certificate 'https://www.hep.phy.cam.ac.uk/~lwhitehead/neutrino_images_with_nc.tgz' -O images/images.tgz
  !tar -xzf images/images.tgz -C images/

# Work out the number of classes form the directory structure
root_dir = 'images/'
dir_contents = os.listdir(root_dir)
num_classes = sum(os.path.isdir(os.path.join(root_dir, item)) for item in dir_contents)

print('Dataset consists of', num_classes, 'classes')

class_names = ['CC numu', 'CC nue', 'NC']
for c in range(num_classes):
  print('Number of',class_names[c],'images:')
  !ls -1 images/$c/*.png | wc -l

In [None]:
# Batch size is the number of images processed in parallel
batch_size = 32

# The images have actual size 224 x 224, but I am downsampling
# by a factor of two in order to reduce the run time
img_height = 112
img_width = 112

# Let's make use. of tensorflow dataset objects. They let us
# create a dataset from a directory of images
train_ds = tf.keras.utils.image_dataset_from_directory(
  'images',
  validation_split=0.2,
  subset="training",
  seed=42,
  color_mode="rgb",
  label_mode="categorical",
  shuffle=True,
  image_size=(img_height, img_width),
  batch_size=batch_size)

val_ds = tf.keras.utils.image_dataset_from_directory(
  'images',
  validation_split=0.2,
  subset="validation",
  seed=42,
  color_mode="rgb",
  label_mode="categorical",
  shuffle=True,
  image_size=(img_height, img_width),
  batch_size=batch_size)

# These images have all three views stacked. For simplicity extract the w view
def extract_w_channel(image, label):
    w_channel = image[:, :, :, 2]  # Assuming images are in RGB format
    # Reshape to add a channel dimension
    w_channel = tf.expand_dims(w_channel, axis=-1)
    return w_channel, label

# Apply the function to the dataset
train_ds = train_ds.map(extract_w_channel)
val_ds = val_ds.map(extract_w_channel)

img_shape = None
for images, _ in train_ds.take(1):
    for i in range(1):
        img_shape = images[i].numpy().shape
        print("Image shape =", img_shape)

In [None]:
# For simplicity I will use the keras Sequential model for the MLP
mlp_model = tf.keras.Sequential()
mlp_model.add(tf.keras.Input(shape=img_shape))
mlp_model.add(tf.keras.layers.Flatten())
mlp_model.add(tf.keras.layers.Dense(128, activation='relu'))
mlp_model.add(tf.keras.layers.Dropout(0.5))
mlp_model.add(tf.keras.layers.Dense(64, activation='relu'))
mlp_model.add(tf.keras.layers.Dropout(0.5))
mlp_model.add(tf.keras.layers.Dense(32, activation='relu'))
mlp_model.add(tf.keras.layers.Dropout(0.5))
mlp_model.add(tf.keras.layers.Dense(num_classes, activation='softmax'))
mlp_model.summary()

In [None]:
# Compile the model
learning_rate = 0.001
optimiser = tf.keras.optimizers.legacy.Adam(learning_rate=learning_rate)
loss_fn = tf.keras.losses.CategoricalCrossentropy()
mlp_model.compile(optimizer=optimiser, loss=loss_fn, metrics=["accuracy"])

In [None]:
mlp_model.fit(train_ds, validation_data=val_ds, epochs=10,
          verbose=1, batch_size=batch_size)

In [None]:
cnn_model = tf.keras.Sequential()
cnn_model.add(tf.keras.Input(shape=img_shape))
cnn_model.add(tf.keras.layers.Conv2D(64, (5,5), (2,2), activation='relu'))
cnn_model.add(tf.keras.layers.Conv2D(64, (5,5), (2,2), activation='relu'))
cnn_model.add(tf.keras.layers.Dropout(0.25))
cnn_model.add(tf.keras.layers.Flatten())
cnn_model.add(tf.keras.layers.Dense(num_classes, activation='softmax'))
cnn_model.summary()

In [None]:
cnn_model.compile(optimizer=optimiser, loss=loss_fn, metrics=["accuracy"])

In [None]:
cnn_model.fit(train_ds, validation_data=val_ds, epochs=2,
          verbose=1, batch_size=batch_size)

In [None]:
# Make a list of incorrect classifications
incorrect_indices = []

batch_ds = val_ds.take(1)

for images, labels in batch_ds:
  predictions = cnn_model.predict(images)

  for i in range(len(labels)):
    prediction = np.argmax(predictions[i])
    truth = np.argmax(labels[i])
    if prediction != truth:
      incorrect_indices.append([images[i], prediction, truth])

print('Number of images that were incorrectly classified =',len(incorrect_indices))

In [None]:
import matplotlib.pyplot as plot

# Now you can modify this part to draw different images from the failures list
# You can change the value of im to look at different failures
im = 0
image_to_plot = incorrect_indices[im][0]
image_to_plot = np.clip(image_to_plot, 0.0, 50.0)
fig, ax = plot.subplots(1, 1)
print('Incorrect classification for image',im,
      ': predicted =',incorrect_indices[im][1],
      'with true =',incorrect_indices[im][2])
ax.imshow(image_to_plot)