# Pipeline March 2020
My environment includes the imports listed below in Python 3.7.5.


In [1]:
# ---------------------------- IMPORTS ----------------------------

%reload_ext autoreload
%autoreload 2
import argparse, glob, cv2, time, os, sys, shutil

from PIL import Image
from fastai.vision import *
from matplotlib import pyplot as plt
import numpy as np
from scipy import ndimage as ndi
from IPython.display import clear_output
from datetime import datetime

# -------------------------- JUPYTER UTIL --------------------------
def update_progress(progress):
    bar_length = 20
    if isinstance(progress, int):
        progress = float(progress)
    if not isinstance(progress, float):
        progress = 0
    if progress < 0:
        progress = 0
    if progress >= 1:
        progress = 1
    block = int(round(bar_length * progress))

    clear_output(wait = True)
    text = "Progress: [{0}] {1:.1f}%".format( "#" * block + "-" * (bar_length - block), progress * 100)
    print(text)
    
# ----------------------- CHANNEL COMPRESSION -----------------------
def createMultiChannelImage(fpArr):
    ''' Open multiple images and return a single multi channel image '''
    mat = None
    nChannels = len(fpArr)
    for i,fp in enumerate(fpArr):
        #print('Loading: ', fp)
        img = PIL.Image.open(fp)
        chan = pil2tensor(img, np.float32).float().div_(255)
        if(mat is None):
            mat = torch.zeros((nChannels,chan.shape[1],chan.shape[2]))
        mat[i,:,:]=chan
    return Image(mat)

# ----------------------- IMAGE PREPROCESSOR -----------------------
def image_preprocess(in_path):
    """ Takes a directory path, returns three base versions of image. """
    image = cv2.imread(in_path) 
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blurred = cv2.GaussianBlur(gray, (3, 3), 0)

    return blurred

# ----------------------- PROCESSING ALGORITHMS -----------------------
# --- OpenCV2
def canny_auto(in_path, sigma=0.33):
    #print("Creating canny_auto...")
    img = image_preprocess(in_path)
    v = np.median(img)
    lower = int(max(0, (1.0 - sigma) * v))
    upper = int(min(255, (1.0 + sigma) * v))
    cannyauto = cv2.Canny(img, lower, upper)
    cv2.imwrite('tmp/canny_auto.jpg', cannyauto)
    return 'tmp/canny_auto.jpg'
def canny_wide(in_path):
    """ Take a directory path, writes result to another path """
    #print("Creating canny_wide...")
    img = image_preprocess(in_path)
    cannywide = cv2.Canny(img, 10, 200)
    cv2.imwrite('tmp/canny_wide.jpg', cannywide)
    return 'tmp/canny_wide.jpg'
def canny_tight(in_path):
    """ Take a directory path, writes result to another path """
    #print("Creating canny_tight...")
    img = image_preprocess(in_path)
    cannytight = cv2.Canny(img, 225, 250)
    cv2.imwrite('tmp/canny_tight.jpg', cannytight)
    return 'tmp/canny_tight.jpg'
def laplacian(in_path):
    """ Take a directory path, writes result to another path """
    #print("Creating laplacian...")
    img = image_preprocess(in_path)
    lap = cv2.Laplacian(img,cv2.CV_64F)
    cv2.imwrite('tmp/laplacian.jpg', lap)
    return 'tmp/laplacian.jpg'
def sobel_x(in_path):
    """ Take a directory path, writes result to another path """
    #print("Creating sobel_x...")
    img = image_preprocess(in_path)
    sobelx = cv2.Sobel(img,cv2.CV_64F,1,0,ksize=5)  # x
    cv2.imwrite('tmp/sobel_x.jpg', sobelx)
    return 'tmp/sobel_x.jpg'
def sobel_y(in_path):
    """ Take a directory path, writes result to another path """
    #print("Creating sobel_y...")
    img = image_preprocess(in_path)
    sobely = cv2.Sobel(img,cv2.CV_64F,0,1,ksize=5)  # y
    cv2.imwrite('tmp/sobel_y.jpg', sobely)
    return 'tmp/sobel_y.jpg'

# --- Scipy
def prewitt(in_path):
    """ Take a directory path, writes result to another path """
    #print("Creating prewitt...")
    img = image_preprocess(in_path)
    p = ndi.prewitt(img) 
    cv2.imwrite('tmp/prewitt.jpg', p)
    return 'tmp/prewitt.jpg'


# ----------------------- HANDLERS FOR PROCESSING -----------------------
def createSingleInput(path):
    img = createMultiChannelImage([
        prewitt(path),
        laplacian(path),
        canny_tight(path)
    ])
    img.save('out/COMP.jpg')
    #print('Done!')
    return img

def createMultipleInput(path_ls,out_path, settings):
    imgs = []
    for x in range(len(path_ls)):
        img_layers = []
        if settings[0] == True:
            img_layers.append(canny_tight(path_ls[x]))
            
        if settings[1] == True:
            img_layers.append(canny_auto(path_ls[x]))
            
        if settings[2] == True:
            img_layers.append(canny_wide(path_ls[x]))
            
        if settings[3] == True:
            img_layers.append(laplacian(path_ls[x]))
            
        if settings[4] == True:
            img_layers.append(sobel_x(path_ls[x]))
            
        if settings[5] == True:
            img_layers.append(sobel_y(path_ls[x]))
            
        if settings[6] == True:
            img_layers.append(prewitt(path_ls[x]))
            
        if len(img_layers) == 1:
            img = PIL.Image.open(img_layers[0])  # single channel image
            
            img.save(out_path+str(x)+'.jpg')
            imgs.append(img)
            
        else:
            img = createMultiChannelImage([ # multi channel image
                img_layers[0],
                img_layers[1],
                img_layers[2]
            ])
            img.save(out_path+str(x)+'.jpg')
            imgs.append(img)
    return imgs

def absoluteFilePaths(directory):
    paths = []
    for dirpath,_,filenames in os.walk(directory):
        for f in filenames:
            paths.append(os.path.abspath(os.path.join(dirpath, f)))
    return paths
        
def preprocessDatabase(path, settings): # top level folder as input
    if os.path.exists("out") and os.path.isdir("out"):    #check if path exists already
            #print('REMOVING EXISTING PROCESSED DATABASE')
            shutil.rmtree("out")
    os.mkdir("out")
    
    subfolders = [ f.path for f in os.scandir(path) if f.is_dir() ]
    for label in subfolders:
        imgs = []
        for file in os.listdir(label):
            if file.endswith(".jpg"):
                imgs.append(os.path.join(label, file))
                
        dirpath = os.path.join("out", label.split('/')[-1])
        
        if os.path.exists(dirpath) and os.path.isdir(dirpath):
            shutil.rmtree(dirpath)
        os.mkdir(dirpath)
        
        createMultipleInput(imgs, "out/"+label.split('/')[-1]+"/", settings)
        
# --------------------------- CNN MODEL ---------------------------
def CreateModelFromPath(path, folder_stats=True, image_stats=True):  #
    size = 224
    bs = 4
    data = ImageDataBunch.from_folder(path, 
        ds_tfms=get_transforms(do_flip=True, flip_vert=True),
        valid_pct=0.5, 
        size=size, 
        bs=bs)
    
    data.show_batch(rows=3, figsize=(7,6))
    
    if folder_stats:
        labels = os.listdir(path)
        print("No. of labels: {}".format(len(labels)))
        print("-----------------")
        for label in labels:
            print("{}, {} files".format(label, len(os.listdir(path+label))))

    if image_stats:
        print("\n Data to be run:")
        print("-----------------")
        data.normalize(imagenet_stats)
        data.show_batch(rows=3, figsize=(3,3))
        data.classes

    # Create CNN Learner
    learner = cnn_learner(data, models.resnet34, metrics=[accuracy, error_rate])
    learner.fit_one_cycle(5)
    learner.recorder.plot_lr()
    learner.recorder.plot()
    
    interp = ClassificationInterpretation.from_learner(learner)
    interp.plot_confusion_matrix(figsize=(3,3))
    
# ------------ QUEUE COMPLETE PIPELINE ---------------
def pipeline(dataset_arr):
    for path in dataset_arr:
        print("processing data... (" + path + ")")
        preprocessDatabase(path)
        print("training model... (" + path + ")")
        CreateModelFromPath("out", False, False)
    

In [2]:
# SOME TESTS
# march 11
# FULL PIPELINE FOR MULTIPLE SETS
"""
data_sets = [
    "../training/EC-small/",
    "../training/EC-small/",
    "../training/EC-small/",
    "../training/EC-small/",
    "../training/EC-small/",
    "../training/plant-id/"
]
pipeline(data_sets)

# Testing sequence 
print("Testing Sequence")

print("----- TEST 1: srsd -----")
print("start time: " + str(datetime.now()))
CreateModelFromPath("out", False, False)
time.sleep(180)

print("----- TEST 2: srsd -----")
print("start time: " + str(datetime.now()))
CreateModelFromPath("out", False, False)
time.sleep(180)

print("----- TEST 3: srsd -----")
print("start time: " + str(datetime.now()))
CreateModelFromPath("out", False, False)
time.sleep(180)

print("----- TEST 4: srsd -----")
print("start time: " + str(datetime.now()))
CreateModelFromPath("out", False, False)
time.sleep(180)

print("----- TEST 5: vli -----")
print("start time: " + str(datetime.now()))
CreateModelFromPath("../training/plant-id/", False, False)
time.sleep(180)

print("----- TEST 6: vli -----")
print("start time: " + str(datetime.now()))
CreateModelFromPath("../training/plant-id/", False, False)
time.sleep(180)

print("----- TEST 7: vli -----")
print("start time: " + str(datetime.now()))
CreateModelFromPath("../training/plant-id/", False, False)
time.sleep(180)

print("----- TEST 8: vli -----")
print("start time: " + str(datetime.now()))
CreateModelFromPath("../training/plant-id/", False, False)
time.sleep(180)
"""

'\ndata_sets = [\n    "../training/EC-small/",\n    "../training/EC-small/",\n    "../training/EC-small/",\n    "../training/EC-small/",\n    "../training/EC-small/",\n    "../training/plant-id/"\n]\npipeline(data_sets)\n\n# Testing sequence \nprint("Testing Sequence")\n\nprint("----- TEST 1: srsd -----")\nprint("start time: " + str(datetime.now()))\nCreateModelFromPath("out", False, False)\ntime.sleep(180)\n\nprint("----- TEST 2: srsd -----")\nprint("start time: " + str(datetime.now()))\nCreateModelFromPath("out", False, False)\ntime.sleep(180)\n\nprint("----- TEST 3: srsd -----")\nprint("start time: " + str(datetime.now()))\nCreateModelFromPath("out", False, False)\ntime.sleep(180)\n\nprint("----- TEST 4: srsd -----")\nprint("start time: " + str(datetime.now()))\nCreateModelFromPath("out", False, False)\ntime.sleep(180)\n\nprint("----- TEST 5: vli -----")\nprint("start time: " + str(datetime.now()))\nCreateModelFromPath("../training/plant-id/", False, False)\ntime.sleep(180)\n\nprint

In [None]:
# march 12 tests
print("TEST 1: Canny Tight")
print("start time: " + str(datetime.now()))
algorithm_settings = [True,False,False,False,False,False,False]
preprocessDatabase("../training/plant-id/", algorithm_settings)
CreateModelFromPath("out", False, False)

time.sleep(600) # cooldown

print("TEST 2: Canny Auto")
print("start time: " + str(datetime.now()))
algorithm_settings = [False,True,False,False,False,False,False]
preprocessDatabase("../training/plant-id/", algorithm_settings)
CreateModelFromPath("out", False, False)

time.sleep(600) # cooldown

print("TEST 3: Canny Wide")
print("start time: " + str(datetime.now()))
algorithm_settings = [False,False,True,False,False,False,False]
preprocessDatabase("../training/plant-id/", algorithm_settings)
CreateModelFromPath("out", False, False)

time.sleep(600) # cooldown

print("TEST 4: Laplacian")
print("start time: " + str(datetime.now()))
algorithm_settings = [False,False,False,True,False,False,False]
preprocessDatabase("../training/plant-id/", algorithm_settings)
CreateModelFromPath("out", False, False)

time.sleep(600) # cooldown

print("TEST 5: Sobel X")
print("start time: " + str(datetime.now()))
algorithm_settings = [False,False,False,False,True,False,False]
preprocessDatabase("../training/plant-id/", algorithm_settings)
CreateModelFromPath("out", False, False)

time.sleep(600) # cooldown

print("TEST 6: Sobel Y")
print("start time: " + str(datetime.now()))
algorithm_settings = [False,False,False,False,False,True,False]
preprocessDatabase("../training/plant-id/", algorithm_settings)
CreateModelFromPath("out", False, False)

time.sleep(600) # cooldown

print("TEST 7: Prewitt")
print("start time: " + str(datetime.now()))
algorithm_settings = [False,False,False,False,False,False,True]
preprocessDatabase("../training/plant-id/", algorithm_settings)
CreateModelFromPath("out", False, False)

time.sleep(600) # cooldown

# CONTROL
print("TEST 7: CONTROL")
print("start time: " + str(datetime.now()))
CreateModelFromPath("../training/plant-id/", False, False)

TEST 1: Canny Tight
start time: 2020-03-12 10:16:03.874206


epoch,train_loss,valid_loss,accuracy,error_rate,time
