<a href="https://colab.research.google.com/github/p-nookala/advanced-git-lab/blob/main/CapstoneNeuralNet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Import files from drive

In [None]:
from google.colab import drive
import os
from pathlib import Path
import numpy as np
import cv2
import random
from keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, BatchNormalization, Dropout, Activation
from google.colab.patches import cv2_imshow
from keras.metrics import CategoricalAccuracy
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score

drive.mount('/content/drive')

Mounted at /content/drive


Change directory to labeled data

In [None]:
os.chdir("/content/drive/Shareddrives/Capstone/labeled_data")

Import dataset and find dataset mean and stdev


In [None]:
IMG_SIZE=256
means = []
stdevs = []
images = []
dirs = next(os.walk('.'))[1]

# Generate numeric labels for each unique directory
labels = {dir.split("_")[0] for dir in dirs}
label_dict = dict(zip(labels, [i for i in range(len(labels))]))
print(label_dict)

def generate_labeled_images(img_ary, isTrain=True):
  labels = []
  imgs = []
  for file_path in img_ary:
    image = cv2.imread(str(file_path))
    if image is None or image.size == 0:
      continue
    # Might be worth putting this into a new directory on the drive to reduce training time...
    image = cv2.resize(image, (IMG_SIZE, IMG_SIZE))
    image_float = np.float32(image)
    # create image, label pairing
    image_filename = os.path.basename(str(file_path))
    label = "NOX" if image_filename[0]=="n" else str(file_path.parent).split("_")[0]
    label = label_dict[label]
    imgs.append(image_float)
    labels.append(label)
    if isTrain:
      means.append(np.mean(image, axis=(0, 1)))
      stdevs.append(np.std(image, axis=(0, 1)))

  return imgs, labels

for dir in dirs:
  for file_path in Path(dir+"/original").iterdir():
    images.append(file_path)

# if we want to make this reproducible
# random.seed(482)
random.shuffle(images)
train_imgs = images[:int(.9 * len(images))]
test_imgs = images[int(.9 * len(images)):]

x_train, y_train = generate_labeled_images(train_imgs)
x_test, y_test = generate_labeled_images(test_imgs, False)


# cv2.rotate(src, cv2.ROTATE_90_COUNTERCLOCKWISE)
# image = cv2.rotate(src, cv2.ROTATE_180)
# image = cv2.rotate(src, cv2.ROTATE_90_CLOCKWISE)

print(len(x_train))
# Compute the global mean and std dev
global_mean = np.mean(means, axis=0)
global_std = np.mean(stdevs, axis=0)

print("global mean: ", global_mean)
print("global stdev: ", global_std)



{'TNT': 0, 'RDX': 1, 'TATP': 2, 'NOX': 3}
910
global mean:  [67.06183286 68.12081083 68.71126293]
global stdev:  [98.20364101 98.00416065 99.89296153]


Now we can read in the data and normalize the values

In [None]:
def normalize_image(img, mean, std):
    return (img - mean) / (std + 1e-7)

for i in range(len(x_train)):
  x_train[i] = normalize_image(x_train[i], global_mean, global_std)

for i in range(len(x_test)):
    x_test[i] = normalize_image(x_test[i], global_mean, global_std)

We can trian the model now.

In [None]:

input_shape = (256, 256, 3)

x_train = np.stack(x_train, axis=0)
y_train = np.array(y_train)
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.1)
x_test = np.stack(x_test, axis=0)
y_test = np.array(y_test)
print(y_train)
print(y_test)
model = Sequential([
    # Convolutional layer with 32 filters, a kernel size of 3x3, and ReLU activation
    Conv2D(32, (3, 3), input_shape=input_shape, activation='relu'),
    # Max pooling layer with a pool size of 2x2
    MaxPooling2D(pool_size=(2, 2)),

    # Another convolutional layer, increasing the depth
    Conv2D(64, (3, 3), activation='relu'),
    # Max pooling layer
    MaxPooling2D(pool_size=(2, 2)),

    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),

    # Flatten the 3D output to 1D
    Flatten(),
    # Dense layer with 256 units
    Dense(256, activation='relu'),
    # Output layer with N units (one for each class) and softmax activation
    Dense(len(labels), activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

history = model.fit(x_train, y_train, validation_data=(x_val, y_val), epochs=25)



[3 3 3 3 3 1 3 3 0 1 1 2 3 3 3 3 3 3 3 1 1 3 3 1 3 3 3 3 2 3 3 3 3 1 2 3 1
 3 3 3 3 3 2 0 1 1 0 0 3 1 0 0 3 1 3 3 2 3 1 3 3 1 3 3 3 1 3 0 3 3 3 2 3 3
 3 3 3 1 0 3 0 2 2 3 0 1 3 3 3 3 3 3 3 3 2 3 1 3 3 3 0 2 3 3 3 3 0 1 1 2 3
 3 3 0 2 3 1 0 3 2 3 1 3 3 2 0 0 0 3 2 1 0 3 1 1 0 1 3 1 3 3 3 3 3 3 0 1 0
 3 3 3 2 3 1 2 3 3 3 0 0 3 3 3 2 1 1 3 3 3 0 1 1 1 3 1 3 3 2 3 3 3 1 1 0 3
 3 3 3 3 3 3 0 3 3 3 0 3 2 3 3 1 2 3 0 3 3 1 1 2 3 1 1 0 2 3 3 3 3 3 3 3 0
 0 0 3 2 3 3 3 1 3 0 3 3 3 1 3 3 3 0 3 3 2 0 1 3 3 0 3 3 3 0 3 1 3 3 1 1 3
 3 3 1 1 1 2 3 3 2 3 3 3 1 3 3 3 3 3 3 3 3 3 3 0 3 2 3 0 3 3 3 3 3 1 0 3 3
 3 1 1 1 1 2 0 3 3 3 3 2 3 3 0 1 1 3 3 3 2 3 1 1 3 3 3 3 3 1 3 3 1 3 0 3 3
 1 3 3 3 1 3 3 3 2 3 1 1 3 1 3 3 0 3 3 3 0 0 3 3 0 3 1 3 3 3 3 3 3 3 0 3 3
 3 3 0 3 3 3 1 3 1 2 1 2 1 3 1 3 3 1 1 3 1 2 3 0 3 2 2 1 3 0 3 3 3 0 3 3 3
 3 3 3 3 3 1 1 3 3 3 3 3 1 3 3 3 3 3 1 3 1 3 3 0 0 0 3 3 3 3 3 1 3 1 3 3 3
 1 3 2 3 1 3 1 0 3 3 3 3 0 3 3 1 3 3 0 1 1 3 3 0 3 0 1 3 1 3 3 2 3 3 1 3 3
 1 3 3 2 1 2 3 3 3 1 3 2 

See model predictions on test set

In [None]:
val_predictions = model.predict(x_test)
print(np.argmax(val_predictions, axis=1))
preds = np.argmax(val_predictions, axis=1)
print("Classification accuracy: ", accuracy_score(preds, y_test))
print(y_test)

p_n_preds = preds != 3
p_n_test = y_test != 3

print(sum(p_n_test)/len(p_n_test))

print(p_n_preds)
print(p_n_test)

for i in range(len(p_n_test)):
  if p_n_test[i] != p_n_preds[i]:
    print(i)
    print(test_imgs[i])

print("P/N accuracy: ", accuracy_score(p_n_preds, p_n_test))
print("P/N F1: ", f1_score(p_n_preds, p_n_test))


print("On ", len(p_n_test), " samples.")

[3 3 3 3 2 3 0 3 3 3 1 2 2 3 1 3 3 2 3 2 3 3 3 0 3 3 3 2 3 3 3 3 1 3 3 2 3
 3 3 3 3 3 2 1 2 2 0 3 3 3 0 1 1 3 3 3 3 0 3 3 1 3 3 0 3 3 3 1 3 3 3 0 3 3
 3 3 3 3 3 2 3 1 1 2 3 0 3 3 3 3 3 1 3 3 3 2 3 3 1 1 3 3]
Classification accuracy:  0.9117647058823529
[3 3 3 3 2 3 0 3 3 3 1 2 2 3 1 3 3 2 3 2 3 3 3 0 3 3 2 2 3 3 3 3 1 3 3 2 3
 0 3 3 1 3 2 1 2 2 0 1 3 3 0 0 1 3 3 3 3 0 3 3 1 3 3 0 3 3 3 1 3 3 3 0 3 3
 3 3 3 3 3 2 3 1 3 2 2 0 3 1 3 3 3 1 3 3 3 2 3 2 1 1 3 3]
0.39215686274509803
[False False False False  True False  True False False False  True  True
  True False  True False False  True False  True False False False  True
 False False False  True False False False False  True False False  True
 False False False False False False  True  True  True  True  True False
 False False  True  True  True False False False False  True False False
  True False False  True False False False  True False False False  True
 False False False False False False False  True False  True  True  True
 False  