In [73]:
import numpy as np
import os
import PIL
from PIL import Image
import matplotlib.pyplot as plt
import pandas as pd

from sklearn.model_selection import train_test_split
from keras.models import Model
from keras.layers import Flatten, Dense, Dropout
from keras.layers import Convolution2D, MaxPooling2D
from keras.layers import BatchNormalization, GlobalAveragePooling2D
from keras.utils import to_categorical
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.utils import shuffle
from openpyxl import load_workbook
from keras.applications.vgg16 import preprocess_input
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array

In [74]:
size = 224
def read_image(f):
    image = load_img(f, target_size=(224, 224))
    # convert the image pixels to a numpy array
    image = img_to_array(image)
    image = preprocess_input(image)    
    return(image)

In [75]:
size = 224
traindata_folderPath = 'TrainData_Augmentation\\'
validdata_folderPath = 'ValidationData_Augmentation\\'
testdata_folderpath='TestingData_Augmentation\\'
columnName = 'phe'
excelFileName = "Perron_phenotype-GSU-training.xlsx"
sheetName = 'Total Database'
imgPat = 'PIL-55a_3dayLBCR-3'

In [76]:
train_filelist= [file for file in os.listdir(traindata_folderPath) if file.endswith('.jpeg')]
valid_filelist= [file for file in os.listdir(validdata_folderPath) if file.endswith('.jpeg')]
test_filelist= [file for file in os.listdir(testdata_folderpath) if file.endswith('.jpeg')]

In [77]:
n_files = len(train_filelist)
print('Total number of files in directory:', n_files)
n_files = len(valid_filelist)
print('Total number of files in directory:', n_files)
n_files = len(test_filelist)
print('Total number of files in directory:', n_files)

Total number of files in directory: 1847
Total number of files in directory: 637
Total number of files in directory: 518


In [78]:
wb = load_workbook(excelFileName)

In [79]:
sheet = wb[sheetName]

In [80]:
def getColumnNumber(columnName, sheet):
    reqColNum = -1
    maxColNumber = sheet.max_column + 1
    for x in range(1, maxColNumber):
        cellValue = sheet.cell(row=1, column=x).value
        if(cellValue == columnName):
                reqColNum = x
                break
    return(reqColNum)

In [81]:
import re
def getStrainNumber(imageName):
    strt = imageName.find('-')
    end = imageName.find('_')
    imgNam = imageName[strt+1:end]
    
    imgNam = re.sub(r'[a-z]', '', imgNam)
    return(imgNam)

In [82]:
def getRowNumber(strainNumber, sheet, columnNumber):
    
    reqRowNum = -1
    maxRowNumber = sheet.max_row + 1
    strainNumbr = float(strainNumber)
    
    for x in range(1, maxRowNumber):
        cellVal = sheet.cell(row=x, column=columnNumber).value
        try:
            cellValue = float(cellVal)            
            if(cellValue == strainNumbr):
                reqRowNum = x
                break
        except ValueError:
            continue
    return(reqRowNum) 

In [83]:
colNumber = getColumnNumber(columnName, sheet)

In [84]:
print(colNumber)

5


In [85]:
strainColumnNumber = getColumnNumber('strain', sheet)

In [86]:
X_Train = []
Y_Train = []
print('Reading train images ...')
for file in train_filelist:
    imagepath = os.path.join(traindata_folderPath, file)
    im = read_image(imagepath)
    #print(im.shape)
    req_ColNumber = colNumber
    req_rowNumber = getRowNumber(getStrainNumber(file),sheet,strainColumnNumber)
    try:
        if(req_rowNumber>0 and req_ColNumber>0):
            req_CellValue = sheet.cell(row=req_rowNumber, column=req_ColNumber).value
            X_Train.append(im)
            Y_Train.append(req_CellValue)
    except:
        print("Error")
        print(imagepath)
        print(getStrainNumber(file))
        print(req_ColNumber)
        print(req_rowNumber)
        print(req_CellValue)
        print("Error")
        continue

Reading train images ...


In [87]:
X_Valid = []
Y_Valid = []
print('Reading train images ...')
for file in valid_filelist:
    imagepath = os.path.join(validdata_folderPath, file)
    im = read_image(imagepath)
    req_ColNumber = colNumber
    req_rowNumber = getRowNumber(getStrainNumber(file),sheet,strainColumnNumber)
    try:
        if(req_rowNumber>0 and req_ColNumber>0):
            req_CellValue = sheet.cell(row=req_rowNumber, column=req_ColNumber).value
            X_Valid.append(im)
            Y_Valid.append(req_CellValue)
    except:
        print("Error")
        print(imagepath)
        print(getStrainNumber(file))
        print(req_ColNumber)
        print(req_rowNumber)
        print(req_CellValue)
        print("Error")
        continue

Reading train images ...


In [88]:
X_Test = []
Y_Test = []
print('Reading train images ...')
for file in test_filelist:
    imagepath = os.path.join(testdata_folderpath, file)
    im = read_image(imagepath)
    req_ColNumber = colNumber
    req_rowNumber = getRowNumber(getStrainNumber(file),sheet,strainColumnNumber)
    try:
        if(req_rowNumber>0 and req_ColNumber>0):
            req_CellValue = sheet.cell(row=req_rowNumber, column=req_ColNumber).value
            X_Test.append(im)
            Y_Test.append(req_CellValue)
    except:
        print("Error")
        print(imagepath)
        print(getStrainNumber(file))
        print(req_ColNumber)
        print(req_rowNumber)
        print(req_CellValue)
        print("Error")
        continue

Reading train images ...


In [89]:
X_Train = np.array(X_Train)
print(X_Train.shape, 'X_Train Shape')

Y_Train = np.array(Y_Train).reshape(-1,1)
print(Y_Train.shape, 'Y_Train Shape')

X_Valid = np.array(X_Valid)
print(X_Valid.shape, 'X_Valid Shape')

Y_Valid = np.array(Y_Valid).reshape(-1,1)
print(Y_Valid.shape, 'Y_Valid Shape')

X_Test = np.array(X_Test)
print(X_Test.shape, 'X_Test Shape')

Y_Test = np.array(Y_Test).reshape(-1,1)
print(Y_Test.shape, 'Y_Test Shape')

(1847, 224, 224, 3) X_Train Shape
(1847, 1) Y_Train Shape
(637, 224, 224, 3) X_Valid Shape
(637, 1) Y_Valid Shape
(518, 224, 224, 3) X_Test Shape
(518, 1) Y_Test Shape


In [90]:
# Store Model metrics
history = {}

In [97]:

# Get VGG16 architecture from keras.applications
from keras.applications.vgg16 import VGG16,decode_predictions

# downloading weights and convolution layers from vgg16
trained_model= VGG16(include_top=False,weights='imagenet')
# print(trained_model.summary())

# defining mlp that needs to be appended to vgg16 convolution layers
x = trained_model.output
x = GlobalAveragePooling2D()(x)

x = Dense(512,activation='relu')(x)
x = Dense(256,activation='relu')(x)
x = Dense(128,activation='relu')(x)

output = Dense(1,activation='linear')(x)

model= Model(inputs=trained_model.input,outputs=output)


In [98]:
for layer in trained_model.layers:
    layer.trainable=False

In [99]:
for layer in trained_model.layers:
    print(layer.name,layer.trainable)

input_6 False
block1_conv1 False
block1_conv2 False
block1_pool False
block2_conv1 False
block2_conv2 False
block2_pool False
block3_conv1 False
block3_conv2 False
block3_conv3 False
block3_pool False
block4_conv1 False
block4_conv2 False
block4_conv3 False
block4_pool False
block5_conv1 False
block5_conv2 False
block5_conv3 False
block5_pool False


In [100]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_6 (InputLayer)         (None, None, None, 3)     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, None, None, 64)    1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, None, None, 64)    36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, None, None, 64)    0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, None, None, 128)   73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, None, None, 128)   147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, None, None, 128)   0         
__________

In [101]:
#compiling the model
adam = Adam(lr=0.001)
model.compile(loss='mean_squared_error',metrics=['MSE'],optimizer=adam)
patience = 30
earlystop = EarlyStopping(monitor="val_loss",min_delta=0,mode="min",patience=patience,verbose=1)
checkpoint = ModelCheckpoint('modelweights_Regression_phe.hdf5', 
                              monitor='val_loss', 
                              verbose=1, 
                              save_best_only=True, 
                              mode='min')
callbacks = [checkpoint,earlystop]

In [102]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_6 (InputLayer)         (None, None, None, 3)     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, None, None, 64)    1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, None, None, 64)    36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, None, None, 64)    0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, None, None, 128)   73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, None, None, 128)   147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, None, None, 128)   0         
__________

In [103]:
#fitting the model
history =model.fit(X_Train,Y_Train,epochs=40 ,batch_size=16,validation_data=(X_Valid,Y_Valid),callbacks=callbacks)

Train on 1847 samples, validate on 637 samples
Epoch 1/40

Epoch 00001: val_loss improved from inf to 0.02859, saving model to modelweights_Regression_phe.hdf5
Epoch 2/40

Epoch 00002: val_loss improved from 0.02859 to 0.02559, saving model to modelweights_Regression_phe.hdf5
Epoch 3/40

Epoch 00003: val_loss improved from 0.02559 to 0.01872, saving model to modelweights_Regression_phe.hdf5
Epoch 4/40

Epoch 00004: val_loss did not improve from 0.01872
Epoch 5/40

Epoch 00005: val_loss improved from 0.01872 to 0.01733, saving model to modelweights_Regression_phe.hdf5
Epoch 6/40

Epoch 00006: val_loss improved from 0.01733 to 0.01481, saving model to modelweights_Regression_phe.hdf5
Epoch 7/40

Epoch 00007: val_loss did not improve from 0.01481
Epoch 8/40

Epoch 00008: val_loss did not improve from 0.01481
Epoch 9/40

Epoch 00009: val_loss improved from 0.01481 to 0.01370, saving model to modelweights_Regression_phe.hdf5
Epoch 10/40

Epoch 00010: val_loss did not improve from 0.01370
Ep

Epoch 33/40

Epoch 00033: val_loss did not improve from 0.00628
Epoch 34/40

Epoch 00034: val_loss did not improve from 0.00628
Epoch 35/40

Epoch 00035: val_loss did not improve from 0.00628
Epoch 36/40

Epoch 00036: val_loss did not improve from 0.00628
Epoch 37/40

Epoch 00037: val_loss improved from 0.00628 to 0.00606, saving model to modelweights_Regression_phe.hdf5
Epoch 38/40

Epoch 00038: val_loss improved from 0.00606 to 0.00556, saving model to modelweights_Regression_phe.hdf5
Epoch 39/40

Epoch 00039: val_loss did not improve from 0.00556
Epoch 40/40

Epoch 00040: val_loss did not improve from 0.00556


In [104]:
# Reload Best Model for further usage....
model.load_weights('modelweights_Regression_phe.hdf5')

In [105]:
scores_Train = model.evaluate(X_Train, Y_Train, verbose=0)
print(model.metrics_names[1], scores_Train[1])

mean_squared_error 0.00034870645087192785


In [106]:
scores_Valid = model.evaluate(X_Valid,Y_Valid, verbose=0)
print(model.metrics_names[1], scores_Valid[1])

mean_squared_error 0.005561113104244674


In [107]:
scores = model.evaluate(X_Test, Y_Test, verbose=0)
print(model.metrics_names[1], scores[1])

mean_squared_error 0.011002254193708478
