In [37]:
import numpy as np
import os
import PIL
from PIL import Image
import matplotlib.pyplot as plt
import pandas as pd

from sklearn.model_selection import train_test_split
from keras.models import Model
from keras.layers import Flatten, Dense, Dropout
from keras.layers import Convolution2D, MaxPooling2D
from keras.layers import BatchNormalization, GlobalAveragePooling2D
from keras.utils import to_categorical
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.utils import shuffle
from openpyxl import load_workbook
from keras.applications.vgg16 import preprocess_input
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array

In [38]:
size = 224
def read_image(f):
    image = load_img(f, target_size=(224, 224))
    # convert the image pixels to a numpy array
    image = img_to_array(image)
    image = preprocess_input(image)    
    return(image)

In [39]:
size = 224
traindata_folderPath = 'TrainData_Augmentation\\'
validdata_folderPath = 'ValidationData_Augmentation\\'
testdata_folderpath='TestingData_Augmentation\\'
columnName = 'toby.max.od'
excelFileName = "Perron_phenotype-GSU-training.xlsx"
sheetName = 'Total Database'
imgPat = 'PIL-55a_3dayLBCR-3'

In [40]:
train_filelist= [file for file in os.listdir(traindata_folderPath) if file.endswith('.jpeg')]
valid_filelist= [file for file in os.listdir(validdata_folderPath) if file.endswith('.jpeg')]
test_filelist= [file for file in os.listdir(testdata_folderpath) if file.endswith('.jpeg')]

In [41]:
n_files = len(train_filelist)
print('Total number of files in directory:', n_files)
n_files = len(valid_filelist)
print('Total number of files in directory:', n_files)
n_files = len(test_filelist)
print('Total number of files in directory:', n_files)

Total number of files in directory: 1847
Total number of files in directory: 637
Total number of files in directory: 518


In [42]:
wb = load_workbook(excelFileName)

In [43]:
sheet = wb[sheetName]

In [44]:
def getColumnNumber(columnName, sheet):
    reqColNum = -1
    maxColNumber = sheet.max_column + 1
    for x in range(1, maxColNumber):
        cellValue = sheet.cell(row=1, column=x).value
        if(cellValue == columnName):
                reqColNum = x
                break
    return(reqColNum)

In [45]:
import re
def getStrainNumber(imageName):
    strt = imageName.find('-')
    end = imageName.find('_')
    imgNam = imageName[strt+1:end]
    
    imgNam = re.sub(r'[a-z]', '', imgNam)
    return(imgNam)

In [46]:
def getRowNumber(strainNumber, sheet, columnNumber):
    
    reqRowNum = -1
    maxRowNumber = sheet.max_row + 1
    strainNumbr = float(strainNumber)
    
    for x in range(1, maxRowNumber):
        cellVal = sheet.cell(row=x, column=columnNumber).value
        try:
            cellValue = float(cellVal)            
            if(cellValue == strainNumbr):
                reqRowNum = x
                break
        except ValueError:
            continue
    return(reqRowNum) 

In [47]:
colNumber = getColumnNumber(columnName, sheet)

In [48]:
print(colNumber)

18


In [49]:
strainColumnNumber = getColumnNumber('strain', sheet)

In [50]:
X_Train = []
Y_Train = []
print('Reading train images ...')
for file in train_filelist:
    imagepath = os.path.join(traindata_folderPath, file)
    im = read_image(imagepath)
    #print(im.shape)
    req_ColNumber = colNumber
    req_rowNumber = getRowNumber(getStrainNumber(file),sheet,strainColumnNumber)
    try:
        if(req_rowNumber>0 and req_ColNumber>0):
            req_CellValue = sheet.cell(row=req_rowNumber, column=req_ColNumber).value
            X_Train.append(im)
            Y_Train.append(req_CellValue)
    except:
        print("Error")
        print(imagepath)
        print(getStrainNumber(file))
        print(req_ColNumber)
        print(req_rowNumber)
        print(req_CellValue)
        print("Error")
        continue

Reading train images ...


In [51]:
X_Valid = []
Y_Valid = []
print('Reading train images ...')
for file in valid_filelist:
    imagepath = os.path.join(validdata_folderPath, file)
    im = read_image(imagepath)
    req_ColNumber = colNumber
    req_rowNumber = getRowNumber(getStrainNumber(file),sheet,strainColumnNumber)
    try:
        if(req_rowNumber>0 and req_ColNumber>0):
            req_CellValue = sheet.cell(row=req_rowNumber, column=req_ColNumber).value
            X_Valid.append(im)
            Y_Valid.append(req_CellValue)
    except:
        print("Error")
        print(imagepath)
        print(getStrainNumber(file))
        print(req_ColNumber)
        print(req_rowNumber)
        print(req_CellValue)
        print("Error")
        continue

Reading train images ...


In [52]:
X_Test = []
Y_Test = []
print('Reading train images ...')
for file in test_filelist:
    imagepath = os.path.join(testdata_folderpath, file)
    im = read_image(imagepath)
    req_ColNumber = colNumber
    req_rowNumber = getRowNumber(getStrainNumber(file),sheet,strainColumnNumber)
    try:
        if(req_rowNumber>0 and req_ColNumber>0):
            req_CellValue = sheet.cell(row=req_rowNumber, column=req_ColNumber).value
            X_Test.append(im)
            Y_Test.append(req_CellValue)
    except:
        print("Error")
        print(imagepath)
        print(getStrainNumber(file))
        print(req_ColNumber)
        print(req_rowNumber)
        print(req_CellValue)
        print("Error")
        continue

Reading train images ...


In [53]:
X_Train = np.array(X_Train)
print(X_Train.shape, 'X_Train Shape')

Y_Train = np.array(Y_Train).reshape(-1,1)
print(Y_Train.shape, 'Y_Train Shape')

X_Valid = np.array(X_Valid)
print(X_Valid.shape, 'X_Valid Shape')

Y_Valid = np.array(Y_Valid).reshape(-1,1)
print(Y_Valid.shape, 'Y_Valid Shape')

X_Test = np.array(X_Test)
print(X_Test.shape, 'X_Test Shape')

Y_Test = np.array(Y_Test).reshape(-1,1)
print(Y_Test.shape, 'Y_Test Shape')

(1847, 224, 224, 3) X_Train Shape
(1847, 1) Y_Train Shape
(637, 224, 224, 3) X_Valid Shape
(637, 1) Y_Valid Shape
(518, 224, 224, 3) X_Test Shape
(518, 1) Y_Test Shape


In [54]:
# Store Model metrics
history = {}

In [55]:
# Plot Chart
def plot_chart_to_file(best_epoch, best_value):

    # Plot Chart
    fig = plt.figure(dpi=300)

    # Subplot for Loss
    ax1 = fig.add_subplot(111)    
    ax1.plot(history.history['mean_squared_error'], color='b', 
    label='TrainMSE')
    ax1.plot(history.history['val_mean_squared_error'], color='g', 
    label='validationMSE')
    ax1.set_xlabel('Epoch')
    ax1.set_ylabel('MSE')
    ax1.legend(loc='lower left', bbox_to_anchor=(0, -0.30))
    #ax2 = ax1.twinx()
    #ax2.plot(history.history['mean_squared_error'], color='y', 
    #label='TrainMSE Loss')
    #ax2.plot(history.history['val_mean_squared_error'], color='c', 
    #label='validationMSE Loss')
    ax1.plot(best_epoch, best_value, 'r+', label='Best Model')
    #ax2.set_ylabel('Loss')   
    #ax2.legend(loc='lower right', bbox_to_anchor=(1, -0.30))
    # Set Title
    plt.title('Model - Best Epoch (' + str(best_epoch) + ')')
    plt.show()
    # .. and save..
    plt.savefig('Blog3_Model_Chart_Regression_MSE_dead.png', 
    bbox_inches="tight")   

In [56]:

# Get VGG16 architecture from keras.applications
from keras.applications.vgg16 import VGG16,decode_predictions

# downloading weights and convolution layers from vgg16
trained_model= VGG16(include_top=False,weights='imagenet')
# print(trained_model.summary())

# defining mlp that needs to be appended to vgg16 convolution layers
x = trained_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(512,activation='relu')(x)
x = Dense(256,activation='relu')(x)
x = Dense(128,activation='relu')(x)
x = Dense(64,activation='relu')(x)
output = Dense(1,activation='linear')(x)

model= Model(inputs=trained_model.input,outputs=output)


In [57]:
for layer in trained_model.layers:
    layer.trainable=False

In [58]:
for layer in trained_model.layers:
    print(layer.name,layer.trainable)

input_2 False
block1_conv1 False
block1_conv2 False
block1_pool False
block2_conv1 False
block2_conv2 False
block2_pool False
block3_conv1 False
block3_conv2 False
block3_conv3 False
block3_pool False
block4_conv1 False
block4_conv2 False
block4_conv3 False
block4_pool False
block5_conv1 False
block5_conv2 False
block5_conv3 False
block5_pool False


In [59]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, None, None, 3)     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, None, None, 64)    1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, None, None, 64)    36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, None, None, 64)    0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, None, None, 128)   73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, None, None, 128)   147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, None, None, 128)   0         
__________

In [60]:
#compiling the model
adam = Adam(lr=0.001)
model.compile(loss='mean_squared_error',metrics=['MSE'],optimizer=adam)
patience = 30
earlystop = EarlyStopping(monitor="val_loss",min_delta=0,mode="min",patience=patience,verbose=1)
checkpoint = ModelCheckpoint('modelweights_Regression_tobymax.hdf5', 
                              monitor='val_loss', 
                              verbose=1, 
                              save_best_only=True, 
                              mode='min')
callbacks = [checkpoint,earlystop]

In [61]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, None, None, 3)     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, None, None, 64)    1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, None, None, 64)    36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, None, None, 64)    0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, None, None, 128)   73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, None, None, 128)   147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, None, None, 128)   0         
__________

In [None]:
#fitting the model
history =model.fit(X_Train,Y_Train,epochs=150,batch_size=16,validation_data=(X_Valid,Y_Valid),callbacks=callbacks)

Train on 1847 samples, validate on 637 samples
Epoch 1/150

Epoch 00001: val_loss improved from inf to 0.27017, saving model to modelweights_Regression_tobymax.hdf5
Epoch 2/150

Epoch 00002: val_loss improved from 0.27017 to 0.26074, saving model to modelweights_Regression_tobymax.hdf5
Epoch 3/150

Epoch 00003: val_loss improved from 0.26074 to 0.21833, saving model to modelweights_Regression_tobymax.hdf5
Epoch 4/150

Epoch 00004: val_loss did not improve from 0.21833
Epoch 5/150

Epoch 00005: val_loss did not improve from 0.21833
Epoch 6/150
 416/1847 [=====>........................] - ETA: 2:39:07 - loss: 0.0044 - mean_squared_error: 0.0044

In [None]:
# Reload Best Model for further usage....
model.load_weights('modelweights_Regression_dead.hdf5')

In [None]:
# Plot Charts
plot_chart_to_file(earlystop.stopped_epoch - patience, earlystop.best)

In [None]:
scores_Train = model.evaluate(X_Train, Y_Train, verbose=0)
print(model.metrics_names[1], scores_Train[1])

In [None]:
scores_Valid = model.evaluate(X_Valid,Y_Valid, verbose=0)
print(model.metrics_names[1], scores_Valid[1])

In [None]:
scores = model.evaluate(X_Test, Y_Test, verbose=0)
print(model.metrics_names[1], scores[1])

In [34]:
history.history

{'val_loss': [0.24127808856826094,
  0.18470279028378953,
  0.21906741710563937,
  0.1901948787184254,
  0.20263647578592345,
  0.22703432880804014,
  0.18057858190255086,
  0.20312976777652977,
  0.20022539404590775,
  0.20863510021688403,
  0.20565592568423277,
  0.21055607087223013,
  0.20863751415163279,
  0.21767608675187575,
  0.20545550773440738,
  0.2016458948584364,
  0.19500298623808512,
  0.20349247877382245,
  0.1929977237520333,
  0.1929615129215213,
  0.1927484520162738,
  0.1886529178172935,
  0.1932137028958935,
  0.18843367694236712,
  0.2006499472964596,
  0.19024725495932662,
  0.18974413710435797,
  0.20461328668275694,
  0.18966649300045973,
  0.18232611240343932,
  0.19585739828213417,
  0.19201033317748592,
  0.19245766953008142,
  0.19645537917167416,
  0.18698502542506582,
  0.18712375174679324,
  0.1866464795715803],
 'val_mean_squared_error': [0.24127808856826094,
  0.18470279028378953,
  0.21906741710563937,
  0.1901948787184254,
  0.20263647578592345,
  0.2