In [None]:
import pandas as pd
import numpy as np
import cv2
import os
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from keras.utils import np_utils
from keras.layers.core import Dense, Dropout, Activation
from keras.layers import Conv2D, MaxPooling2D, AveragePooling2D, Flatten
from keras.models import Sequential
from keras.optimizers import SGD, RMSprop


from skimage.io import imread
from skimage.color import rgb2gray

In [None]:
# Build the data set
# Keep adding more images and classifiers here
# Now: load csv files, Future: Pull from Hermes

# Now:
# load all image files from a directory
# add dataframe with number classifier

# Future:
# Query Hermes API to call all images by certain date
# construct dataframe to be three columns: image, plugin

In [None]:
# Use this function to create an image+classifer 2-column df
def create_im_classifer_df(path, classifer):

    # Puts all files from a directory (path) into list called files
    #path = "C:\\Users\\RiggsSc\\Documents\\LAM\\D_Science\\SIM\\CNN_image_class\\PR\\"
    files = os.listdir(path)

    # Subsets the files list to only include files with the last four extension of tif
    files_tiff = [f for f in files if f[-3:] == 'tif']

    new_images = []
    new_classifer = []
    for image in files_tiff:
        im = cv2.imread(path + image)
        gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
        new_images.append(gray)
        new_classifer.append(classifer)
    
    # Creates a df, one column is the grayscale images, the other a classifer
    new_data_df = pd.DataFrame({'images': new_images,
                                'classifer': new_classifer})
    return new_data_df

In [None]:
# function to append new image/classifer df to main dataset df
def increase_dataset_df(main_data_set, new_df, col_name):
    
    rescaled_im = []
    for image in new_df[col_name]:
        # compare shapes
        if image.shape == main_data_set[col_name][0].shape:
            pass
        else:
            # make the new_df have the same shape as the old df
            # *CURRENTLY ONLY WORKS if main_data_set.shape > new_df.shape
            image = cv2.copyMakeBorder(image,
                                       0,
                                       main_data_set[col_name][0].shape[0] - image.shape[0],
                                       0,
                                       main_data_set[col_name][1].shape[1] - image.shape[1],
                                       cv2.BORDER_CONSTANT,value=0)
            #print(image.shape, main_data_set[col_name][0].shape)
        rescaled_im.append(image)    
      
    rescaled_im_df = pd.DataFrame({'images': rescaled_im,
                                'classifer': new_df['classifer']})
    
    return main_data_set.append(rescaled_im_df, ignore_index = True)

In [None]:
# builds test/train set from a 2-column image/classifer df
def df_to_test_train_split(df): 

    # dim_row & dim_col are dimensions of the images
    dim_row = df['images'][0].shape[0]
    dim_col = df['images'][0].shape[1]
        
    # shape X
    X=np.empty(shape=(len(df['images']), dim_row, dim_col)) #create empty 3D tensor
    
    for i in range(len(df['images'])): # turn df column into 3D tensor
        X[i] = df['images'][i]
        
    X = X.reshape(X.shape[0], dim_row, dim_col, 1)
    X = X.astype('float32') # change pixel values into floats
    X /= 255 # divide by the the 0-255 bit number
    #print(X[0])

    # shape y
    le = preprocessing.LabelEncoder() # Create a label (category) encoder object
    le.fit(df['classifer']) # fit the encoder to the df column
    #print(list(le.classes_)) # print out the classes
    #print(list(le.inverse_transform([2, 2, 1])))# Convert some integers into their category names
    y = np.array(le.transform(df['classifer'])) # apply the fitted encoder to the df column, turn into vector
    y = np_utils.to_categorical(y)


    # test train X,y
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=.85)
    return X_train, X_test, y_train, y_test

In [None]:
### Build the CNN

model = Sequential()

# Design a CNN
model.add(Conv2D(3, (9, 9), padding='valid', input_shape=(1908, 2048, 1))) #fixed?
model.add(Activation('relu'))
model.add(Conv2D(2, (9, 9)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(5))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(3)) #fixed?
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

# Fit your model
model.fit(X_train, y_train, batch_size=2, epochs=1,    #fixed?
          verbose=1, validation_data=(X_test, y_test))

---

In [None]:
PR = create_im_classifer_df(path='C:\\Users\\RiggsSc\\Documents\\LAM\\D_Science\\SIM\\CNN_image_class\\PR\\', classifer='PR')
BSE = create_im_classifer_df(path='C:\\Users\\RiggsSc\\Documents\\LAM\\D_Science\\SIM\\CNN_image_class\\BSE\\', classifer='BSE')
STI = create_im_classifer_df(path='C:\\Users\\RiggsSc\\Documents\\LAM\\D_Science\\SIM\\CNN_image_class\\STI\\', classifer='STI')
STI2 = create_im_classifer_df(path='C:\\Users\\RiggsSc\\Documents\\LAM\\D_Science\\SIM\\CNN_image_class\\STI2\\', classifer='STI')

In [None]:
df1 = increase_dataset_df(STI, BSE, 'images')
df2 = increase_dataset_df(df1, PR, 'images')
main_data_set_df = increase_dataset_df(df2, STI2, 'images')

In [None]:
 X_train, X_test, y_train, y_test = df_to_test_train_split(main_data_set_df)

In [None]:
print(X_train.shape)

In [None]:
# Now and Future:
# Open main data set
# append the new data to the larger data set
# zip and save

In [None]:
# Now:
# function: opens dataset
#           parese df, reshapes X,y data

# Future:
# function: opens dataset
#           3rd column to df plugin to a number
#           parses df, reshapes X,y data

In [None]:
# Now and Future:
# Build the test,train
# Build the CNN
# Run model
# Save model