In [2]:
from PIL import Image # used for loading images
import numpy as np
import os # used for navigating to image path
import imageio # used for writing images
import re # for matching image file name classes
import random

import tensorflow as tf
from tensorflow.keras import datasets, layers, models
import matplotlib.pyplot as plt

In [2]:
# source: https://thispointer.com/python-how-to-get-list-of-files-in-directory-and-sub-directories/
def getListOfFiles(dirName):
    # create a list of file and sub directories 
    # names in the given directory 
    listOfFile = os.listdir(dirName)
    allFiles = list()
    # Iterate over all the entries
    for entry in listOfFile:
        # Create full path
        fullPath = os.path.join(dirName, entry)
        # If entry is a directory then get the list of files in this directory 
        if os.path.isdir(fullPath):
            allFiles = allFiles + getListOfFiles(fullPath)
        else:
            allFiles.append(fullPath)
                
    return allFiles

Here, we read in all the image files from the raw image folder ("Pictures for AI") and then sequentially number then into two groups: "Improbable" and "Probable".

In [3]:
raw_image_dir = '../../data/raw/Pictures for AI'
image_file_list = getListOfFiles(raw_image_dir)

In [24]:
for filename in image_file_list:
    if '.JPG' in filename or '.jpg' in filename:        
        pass
    else:
        print(filename)

../../data/raw/Pictures for AI/Site 42/Site 42_Infrastructure.docx
../../data/raw/Pictures for AI/Site 67/Site 47_Infrastructure.docx
../../data/raw/Pictures for AI/Site 41/Site 41_Infrastructure.docx


In [25]:
probable_counter = 1
improbable_counter = 1
tidy_image_dir = '../../data/tidy/labeled_images/'
if not os.path.exists(tidy_image_dir):
    os.makedirs(tidy_image_dir)
for filename in image_file_list:
    if '.JPG' in filename or '.jpg' in filename:        
        if any(re.findall(r'improbable', filename, re.IGNORECASE)):
            save_name = tidy_image_dir + 'improbable' + '-' + str(improbable_counter) + '.jpg'
            improbable_counter += 1
        elif any(re.findall(r'probable|possible', filename, re.IGNORECASE)):
            save_name = tidy_image_dir + 'probable' + '-' + str(probable_counter) + '.jpg'
            probable_counter += 1   
        imageio.imwrite(save_name, np.array(Image.open(filename)))
print('Number of probable images saved:', probable_counter-1)
print('Number of improbable images saved:', improbable_counter-1) 

Number of probable images saved: 103
Number of improbable images saved: 259


In [5]:
def label_img(name):
  word_label = name.split('-')[0]
  if word_label == 'probable' : 
        return np.array([1, 0])
  elif word_label == 'improbable' : 
    return np.array([0, 1])

In [12]:
#IMG_SIZE = 300
DIR = '../../data/tidy/labeled_images'
def load_training_data(l=0,t=400,r=3024,b=3424):
    data = []
    image_list = os.listdir(DIR)
    #training_image_list = random.sample(image_list, int(.9*len(image_list))) # take 90% of image data for traing
    for img in image_list:
        label = label_img(img)
        path = os.path.join(DIR, img)
        img = Image.open(path)
        img = img.convert('L') # convert image to monochrome 
        img = img.crop((l, t, r, b)) 
        img_size_w, img_size_h = img.size
        #print(img_size_w)
        img_size = int(img_size_w/28)        
        img = img.resize((img_size, img_size), Image.ANTIALIAS)
        data.append([np.array(img), label])        
#     # Basic Data Augmentation - Horizontal Flipping
#     flip_img = Image.open(path)
#     flip_img = flip_img.convert('L')
#     flip_img = flip_img.resize((IMG_SIZE, IMG_SIZE), Image.ANTIALIAS)
#     flip_img = np.array(flip_img)
#     flip_img = np.fliplr(flip_img)
#     train_data.append([flip_img, label])  
#    shuffle(train_data)
    return (data)

In [13]:
processed_image_data = load_training_data()
#test_data = load_training_data()[1]
#plt.imshow(train_data[43][0], cmap = 'gist_gray')

In [18]:
def split_data(image_array, prop=0.80):
    random.shuffle(image_array)
    train_size = int(prop*np.shape(image_array)[0])
    train = image_array[:train_size]
    test = image_array[train_size:]
    return(train, test)

In [19]:
train_data, test_data = split_data(processed_image_data)

  return array(a, dtype, copy=False, order=order)


In [35]:
train_data[1][1]

array([0, 1])

In [51]:
model = models.Sequential([
    layers.Conv2D(64, 7, activation="relu", padding="same", input_shape = (108, 108, 1)), #input shape of course needs to be changed
    layers.MaxPooling2D(2),
    layers.Conv2D(128, 3, activation="relu", padding="same"),
    layers.Conv2D(128, 3, activation="relu", padding="same"),
    layers.MaxPooling2D(2),
    layers.Conv2D(256, 3, activation="relu", padding="same"),
    layers.Conv2D(256, 3, activation="relu", padding="same"),
    layers.MaxPooling2D(2),
    layers.Flatten(),
    layers.Dense(128, activation="relu"),
    layers.Dropout(0.5),
    layers.Dense(64, activation="relu"),
    layers.Dropout(0.5),
    layers.Dense(10, activation="softmax")
])

# model = Sequential()
# model.add(Conv2D(32, kernel_size = (3, 3), activation='relu', input_shape=(IMG_SIZE, IMG_SIZE, 1)))
# model.add(MaxPooling2D(pool_size=(2,2)))
# model.add(BatchNormalization())model.add(Conv2D(64, kernel_size=(3,3), activation='relu'))
# model.add(MaxPooling2D(pool_size=(2,2)))
# model.add(BatchNormalization())model.add(Conv2D(64, kernel_size=(3,3), activation='relu'))
# model.add(MaxPooling2D(pool_size=(2,2)))
# model.add(BatchNormalization())model.add(Conv2D(96, kernel_size=(3,3), activation='relu'))
# model.add(MaxPooling2D(pool_size=(2,2)))
# model.add(BatchNormalization())model.add(Conv2D(32, kernel_size=(3,3), activation='relu'))
# model.add(MaxPooling2D(pool_size=(2,2)))
# model.add(BatchNormalization())
# model.add(Dropout(0.2))model.add(Flatten())
# model.add(Dense(128, activation='relu'))
# #model.add(Dropout(0.3))
# model.add(Dense(2, activation = 'softmax'))

In [1]:
# Compile the model.
model.compile(
  'adam',
  loss='categorical_crossentropy',
  metrics=['accuracy'],
)

NameError: name 'model' is not defined

In [54]:
# Train the model.
model.fit(
    np.array([np.expand_dims(x[0],axis=2) for x in train_data]), # images
    np.array([x[1] for x in train_data]), # labels
    #batch_size=50,
    epochs=10,
    validation_data=(np.array([np.expand_dims(x[0],axis=2) for x in test_data]), np.array([x[1] for x in test_data])),
)

Epoch 1/10


ValueError: in user code:

    /usr/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:806 train_function  *
        return step_function(self, iterator)
    /usr/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:796 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    /usr/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:1211 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    /usr/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:2585 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    /usr/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:2945 _call_for_each_replica
        return fn(*args, **kwargs)
    /usr/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:789 run_step  **
        outputs = model.train_step(data)
    /usr/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:748 train_step
        loss = self.compiled_loss(
    /usr/lib/python3.8/site-packages/tensorflow/python/keras/engine/compile_utils.py:204 __call__
        loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    /usr/lib/python3.8/site-packages/tensorflow/python/keras/losses.py:149 __call__
        losses = ag_call(y_true, y_pred)
    /usr/lib/python3.8/site-packages/tensorflow/python/keras/losses.py:253 call  **
        return ag_fn(y_true, y_pred, **self._fn_kwargs)
    /usr/lib/python3.8/site-packages/tensorflow/python/util/dispatch.py:201 wrapper
        return target(*args, **kwargs)
    /usr/lib/python3.8/site-packages/tensorflow/python/keras/losses.py:1535 categorical_crossentropy
        return K.categorical_crossentropy(y_true, y_pred, from_logits=from_logits)
    /usr/lib/python3.8/site-packages/tensorflow/python/util/dispatch.py:201 wrapper
        return target(*args, **kwargs)
    /usr/lib/python3.8/site-packages/tensorflow/python/keras/backend.py:4687 categorical_crossentropy
        target.shape.assert_is_compatible_with(output.shape)
    /usr/lib/python3.8/site-packages/tensorflow/python/framework/tensor_shape.py:1134 assert_is_compatible_with
        raise ValueError("Shapes %s and %s are incompatible" % (self, other))

    ValueError: Shapes (None, 2) and (None, 10) are incompatible


In [46]:
np.shape([np.expand_dims(x[0],axis=2) for x in train_data])

(289, 108, 108, 1)

In [None]:
loss, acc = model.evaluate(testImages, testLabels, verbose = 0)
print(acc * 100)