In [1]:
import logging
import os
import warnings
import time
# import matplotlib.pyplot as plt
# import matplotlib.style as style
import numpy as np
import pandas as pd
# import seaborn as sns
import tensorflow as tf
import tensorflow_hub as hub
import json
from scipy.stats import spearmanr
from datetime import datetime
from keras.preprocessing import image
from PIL import Image
# from sklearn.preprocessing import MultiLabelBinarizer
# from sklearn.model_selection import train_test_split
# from sklearn.calibration import calibration_curve
from tensorflow.keras import layers
from tensorflow.keras import models
from keras import backend as K
import sys
import pickle


In [2]:
df_train = pd.read_csv('data_csv_files/train_mos.csv')
df_val = pd.read_csv('data_csv_files/val_mos.csv')
df_test = pd.read_csv('data_csv_files/test_mos.csv')

X_train = df_train['image'].tolist()
X_val = df_val['image'].tolist()
X_test = df_test['image'].tolist()

train_y_qual = df_train['qual_mos'].tolist()
val_y_qual = df_val['qual_mos'].tolist()
test_y_qual = df_test['qual_mos'].tolist()

In [3]:
X_train = ['../../vizwiz/train/'+f+'.jpg' for f in X_train]
X_val = ['../../vizwiz/val/'+f+'.jpg' for f in X_val]
X_test = ['../../vizwiz/test/'+f+'.jpg' for f in X_test]

In [91]:
IMG_SIZE = 448 # Specify height and width of image to match the input format of the model
CHANNELS = 3 # Keep RGB color channels to match the input format of the model

BATCH_SIZE = 32 # Big enough to measure an F1-score
AUTOTUNE = tf.data.experimental.AUTOTUNE # Adapt preprocessing and prefetching dynamically
SHUFFLE_BUFFER_SIZE = 64 # Shuffle the training data by a chunck of 1024 observations

In [98]:
def parse_function(filename, label):
    # Read an image from a file
    image_string = tf.io.read_file(filename)
    # Decode it into a dense vector
    image_decoded = tf.image.decode_jpeg(image_string, channels=CHANNELS)
    # Resize it to fixed shape
    image_resized = tf.image.resize(image_decoded, [IMG_SIZE, IMG_SIZE])
    # Normalize it from [0, 255] to [0.0, 1.0]
    image_normalized = image_resized / 255.0
    return image_normalized, label

In [99]:
def create_dataset(filenames, rois, label_1, label_2, is_training):
    """Load and parse dataset.
    Args:
        filenames: list of image paths
        labels: numpy array of shape (BATCH_SIZE, N_LABELS)
        is_training: boolean to indicate training mode
    """

    # Create a first dataset of file paths and labels
    dataset = tf.data.Dataset.from_tensor_slices(({'input_layer':filenames,'input_rois':rois}, {'output_1':label_1,'output_2':label_2}))
    # Parse and preprocess observations in parallel
    dataset = dataset.map(parse_function, num_parallel_calls=AUTOTUNE)

    if is_training == True:
        # This is a small dataset, only load it once, and keep it in memory.
        dataset = dataset.cache()
        # Shuffle the data each buffer size
        dataset = dataset.shuffle(buffer_size=SHUFFLE_BUFFER_SIZE)

    # Batch the data for multiple steps
    dataset = dataset.batch(BATCH_SIZE)
    # Fetch batches in the background while the model is training.
    dataset = dataset.prefetch(buffer_size=AUTOTUNE)

    return dataset


In [8]:
train_ds = create_dataset(X_train, train_y_qual, is_training=True)
val_ds = create_dataset(X_val, val_y_qual, is_training=False)
test_ds = create_dataset(X_test, test_y_qual, is_training=False)

In [100]:
base_model = tf.keras.applications.ResNet50V2(input_shape=(IMG_SIZE,IMG_SIZE,CHANNELS),
                                               include_top=False,
                                               weights='imagenet')

base_model.trainable = False

In [10]:
def srcc(y_true, y_pred):
     return ( tf.py_function(spearmanr, [tf.cast(y_pred, tf.float32),
                       tf.cast(y_true, tf.float32)], Tout = tf.float32) )

In [11]:
LR = 1e-5 # Keep it small when transfer learning
EPOCHS = 10

In [107]:
from tensorflow.python.keras.layers import Layer
import keras.backend as K

if K.backend() == 'tensorflow':
    import tensorflow as tf

class RoiPoolingConv(Layer):
    def __init__(self, pool_size, num_rois, **kwargs):


        self.pool_size = 2
        self.num_rois = 1

        super(RoiPoolingConv, self).__init__(**kwargs)

    def build(self, input_shape):
        
        self.nb_channels = input_shape[0][3]

    def compute_output_shape(self, input_shape):
        return None, self.num_rois, self.pool_size, self.pool_size, self.nb_channels

    def call(self, x, mask=None):
        assert(len(x) == 2)

        img = x[0]
        rois = x[1]

        input_shape = K.shape(img)


        #for roi_idx in range(self.num_rois):

        x = rois[0, 0]
        y = rois[0, 1]
        w = rois[0, 2]
        h = rois[0, 3]

        num_pool_regions = self.pool_size

        x = K.cast(x, 'int32')
        y = K.cast(y, 'int32')
        w = K.cast(w, 'int32')
        h = K.cast(h, 'int32')

        rs = tf.image.resize(img[:, y:y+h, x:x+w, :], (self.pool_size, self.pool_size))

        #final_output = K.reshape(rs, (1, self.pool_size, self.pool_size, self.nb_channels))
        final_output = rs
        return final_output
    
    
    def get_config(self):
        config = {'pool_size': self.pool_size,
                  'num_rois': self.num_rois}
        base_config = super(RoiPoolingConv, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

In [120]:
def build_model():

    #input_shape = (1,7,7,512)

    pooling_regions = 2
    
    input_rois = layers.Input(shape=(4), name='input_rois')
    input_layer = layers.Input(shape=(IMG_SIZE,IMG_SIZE,CHANNELS), name='input_layer')
    
    # out_roi_pool.shape = (1, num_rois, channels, pool_size, pool_size)
    # num_rois (4) 7x7 roi pooling
    base_layers = base_model(input_layer, training=False)
    out_roi_pool = RoiPoolingConv(pooling_regions, 1)([base_layers, input_rois])

    # Flatten the convlutional layer and connected to 2 FC and 2 dropout
    x = layers.TimeDistributed(layers.Flatten(name='flatten'))(out_roi_pool)
    x = layers.TimeDistributed(layers.Dense(512, activation='relu', name='fc1'))(x)
    x = layers.TimeDistributed(layers.Dropout(0.2))(x)
    x = layers.TimeDistributed(layers.Dense(32, activation='relu', name='fc2'))(x)
    x = layers.TimeDistributed(layers.Dropout(0.2))(x)
    output_1 = layers.Dense(7, name='output_1')(x)

#     x = layers.Flatten(name='flatten')(out_roi_pool)
#     x = layers.Dense(512, activation='relu', name='fc1')(x)
#     x = layers.Dropout(0.2)(x)
#     x = layers.Dense(32, activation='relu', name='fc2')(x)
#     x = layers.Dropout(0.2)(x)
#     output_1 = layers.Dense(1, name='output_1')(x)
#     output_2 = layers.Dense(1, name='output_2')(x)

    model = models.Model(inputs=[input_layer, input_rois], outputs=[output_1])
    
    return model

In [123]:
a = [[1,2];[3,2]]

SyntaxError: invalid syntax (<ipython-input-123-cd807df3d41c>, line 1)

In [121]:
model=build_model()

In [122]:
model.summary()

Model: "model_12"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_rois (InputLayer)         [(None, 4)]          0                                            
__________________________________________________________________________________________________
tf.__operators__.getitem_107 (S ()                   0           input_rois[0][0]                 
__________________________________________________________________________________________________
tf.__operators__.getitem_109 (S ()                   0           input_rois[0][0]                 
__________________________________________________________________________________________________
tf.__operators__.getitem_106 (S ()                   0           input_rois[0][0]                 
___________________________________________________________________________________________

In [114]:
############### XceptionNet
import logging
import os
import warnings
import time
# import matplotlib.pyplot as plt
# import matplotlib.style as style
import numpy as np
import pandas as pd
# import seaborn as sns
import tensorflow as tf
import tensorflow_hub as hub
import json
from scipy.stats import spearmanr
from datetime import datetime
from keras.preprocessing import image
from PIL import Image
# from sklearn.preprocessing import MultiLabelBinarizer
# from sklearn.model_selection import train_test_split
# from sklearn.calibration import calibration_curve
from tensorflow.keras import layers
from tensorflow.keras import models
from keras import backend as K
from tensorflow.python.keras.layers import Layer
import sys
import pickle
import ast

model_name = 'test_roipool'

warnings.filterwarnings('ignore')
logging.getLogger("tensorflow").setLevel(logging.ERROR)

# with open('quality.json') as json_file:
#     data = json.load(json_file)


# X_train = data['train']['image']
# X_val = data['val']['image']

df_Tr = pd.read_csv('master_data_train.csv')
df_Va = pd.read_csv('master_data_val.csv')
df_Ts = pd.read_csv('master_data_test.csv')

if type == 'rand':
    df_train = df_Tr.loc[df_Tr['type'] == 'rand']
    df_val = df_Va.loc[df_Va['type'] == 'rand']
    df_test = df_Ts.loc[df_Ts['type'] == 'rand']
elif type == 'sal':
    df_train = df_Tr.loc[df_Tr['type'] == 'sal']
    df_val = df_Va.loc[df_Va['type'] == 'sal']
    df_test = df_Ts.loc[df_Ts['type'] == 'sal']
else:
    df_train = df_Tr
    df_val = df_Va
    df_test = df_Ts

df_train = df_train.reset_index(drop=True)
df_val= df_val.reset_index(drop=True)
df_test = df_test.reset_index(drop=True)

X_train = df_train['image'].tolist()
X_val = df_val['image'].tolist()
X_test = df_test['image'].tolist()

train_img_qual = df_train['img_qual'].tolist()
val_img_qual = df_val['img_qual'].tolist()
test_img_qual = df_test['img_qual'].tolist()

train_patch_qual = df_train['patch_qual'].tolist()
val_patch_qual = df_val['patch_qual'].tolist()
test_patch_qual = df_test['patch_qual'].tolist()

X_train = ['../../vizwiz/train/'+f+'.jpg' for f in X_train]
X_val = ['../../vizwiz/val/'+f+'.jpg' for f in X_val]
X_test = ['../../vizwiz/test/'+f+'.jpg' for f in X_test]

df_train['rescaled_448_coord'] = df_train['rescaled_448_coord'].apply(ast.literal_eval)
X_roi_train = []
for i in range(len(X_train)):
  C = df_train['rescaled_448_coord'][i]
  y = [int(c) for c in C]
  X_roi_train.append(y)

df_val['rescaled_448_coord'] = df_val['rescaled_448_coord'].apply(ast.literal_eval)
X_roi_val = []
for i in range(len(X_val)):
  C = df_val['rescaled_448_coord'][i]
  y = [int(c) for c in C]
  X_roi_val.append(y)

df_test['rescaled_448_coord'] = df_test['rescaled_448_coord'].apply(ast.literal_eval)
X_roi_test = []
for i in range(len(X_test)):
  C = df_test['rescaled_448_coord'][i]
  y = [int(c) for c in C]
  X_roi_test.append(y)


IMG_SIZE = 448 # Specify height and width of image to match the input format of the model
CHANNELS = 3 # Keep RGB color channels to match the input format of the model

def parse_function(input, label):
    # Read an image from a file
    filename = input['input_layer']
    image_string = tf.io.read_file(filename)
    # Decode it into a dense vector
    image_decoded = tf.image.decode_jpeg(image_string, channels=CHANNELS)
    # Resize it to fixed shape
    image_resized = tf.image.resize(image_decoded, [IMG_SIZE, IMG_SIZE])
    # Normalize it from [0, 255] to [0.0, 1.0]
    image_normalized = image_resized / 255.0

    input['input_layer'] = image_normalized
    return input, label


BATCH_SIZE = 64 # Big enough to measure an F1-score
AUTOTUNE = tf.data.experimental.AUTOTUNE # Adapt preprocessing and prefetching dynamically
SHUFFLE_BUFFER_SIZE = 128 # Shuffle the training data by a chunck of 1024 observations

def create_dataset(filenames, rois, label_1, label_2, is_training):
    """Load and parse dataset.
    Args:
        filenames: list of image paths
        labels: numpy array of shape (BATCH_SIZE, N_LABELS)
        is_training: boolean to indicate training mode
    """

    # Create a first dataset of file paths and labels
    dataset = tf.data.Dataset.from_tensor_slices(({'input_layer':filenames,'input_rois':rois}, {'output_1':label_1,'output_2':label_2}))

    # Parse and preprocess observations in parallel
    dataset = dataset.map(parse_function, num_parallel_calls=AUTOTUNE)

    if is_training == True:
        # This is a small dataset, only load it once, and keep it in memory.
        dataset = dataset.take(SHUFFLE_BUFFER_SIZE).cache()
        # Shuffle the data each buffer size
        dataset = dataset.shuffle(buffer_size=SHUFFLE_BUFFER_SIZE)

    # Batch the data for multiple steps
    dataset = dataset.batch(BATCH_SIZE)
    # Fetch batches in the background while the model is training.
    dataset = dataset.prefetch(buffer_size=AUTOTUNE)

    return dataset

train_ds = create_dataset(X_train, X_roi_train, train_img_qual, train_patch_qual, is_training=True)
val_ds = create_dataset(X_val, X_roi_val, val_img_qual, val_patch_qual, is_training=False)
test_ds = create_dataset(X_test, X_roi_test, test_img_qual, test_patch_qual, is_training=False)

# print(X_val)

# feature_extractor_url = "https://tfhub.dev/google/imagenet/mobilenet_v2_100_224/feature_vector/4"
# feature_extractor_url = "https://tfhub.dev/google/imagenet/resnet_v2_50/feature_vector/4"
# feature_extractor_layer = hub.KerasLayer(feature_extractor_url, input_shape=(IMG_SIZE,IMG_SIZE,CHANNELS))

base_model = tf.keras.applications.ResNet50V2(input_shape=(IMG_SIZE,IMG_SIZE,CHANNELS),
                                               include_top=False,
                                               weights='imagenet')
# # Fine-tune from this layer onwards
# fine_tune_at = 125
#
# # Freeze all the layers before the `fine_tune_at` layer
# for layer in base_model.layers[:fine_tune_at]:
#   layer.trainable =  False

base_model.trainable = False

# feature_extractor_layer.trainable = False

@tf.function
def macro_f1(y, y_hat, thresh=0.5):
    """Compute the macro F1-score on a batch of observations (average F1 across labels)

    Args:
        y (int32 Tensor): labels array of shape (BATCH_SIZE, N_LABELS)
        y_hat (float32 Tensor): probability matrix from forward propagation of shape (BATCH_SIZE, N_LABELS)
        thresh: probability value above which we predict positive

    Returns:
        macro_f1 (scalar Tensor): value of macro F1 for the batch
    """
    y_pred = tf.cast(tf.greater(y_hat, thresh), tf.float32)
    tp = tf.cast(tf.math.count_nonzero(y_pred * y, axis=0), tf.float32)
    fp = tf.cast(tf.math.count_nonzero(y_pred * (1 - y), axis=0), tf.float32)
    fn = tf.cast(tf.math.count_nonzero((1 - y_pred) * y, axis=0), tf.float32)
    f1 = 2*tp / (2*tp + fn + fp + 1e-16)
    macro_f1 = tf.reduce_mean(f1)
    return macro_f1

def pearson_r(y_true, y_pred):
    x = y_true
    y = y_pred
    mx = K.mean(x, axis=0)
    my = K.mean(y, axis=0)
    xm, ym = x - mx, y - my
    r_num = K.sum(xm * ym)
    x_square_sum = K.sum(xm * xm)
    y_square_sum = K.sum(ym * ym)
    r_den = K.sqrt(x_square_sum * y_square_sum)
    r = r_num / r_den
    return K.mean(r)

def srcc(y_true, y_pred):
     return ( tf.py_function(spearmanr, [tf.cast(y_pred, tf.float32),
                       tf.cast(y_true, tf.float32)], Tout = tf.float32) )

LR = 1e-5 # Keep it small when transfer learning
EPOCHS = 10

from tensorflow.python.keras.layers import Layer
import keras.backend as K

if K.backend() == 'tensorflow':
    import tensorflow as tf

class RoiPoolingConv(Layer):
    def __init__(self, pool_size, num_rois, **kwargs):


        self.pool_size = 2
        self.num_rois = 1

        super(RoiPoolingConv, self).__init__(**kwargs)

    def build(self, input_shape):

        self.nb_channels = input_shape[0][3]

    def compute_output_shape(self, input_shape):
        return None, self.num_rois, self.pool_size, self.pool_size, self.nb_channels

    def call(self, x, mask=None):
        assert(len(x) == 2)

        img = x[0]
        rois = x[1]

        input_shape = K.shape(img)


        #for roi_idx in range(self.num_rois):

        x = rois[0, 0]
        y = rois[0, 1]
        w = rois[0, 2]
        h = rois[0, 3]

        num_pool_regions = self.pool_size

        x = K.cast(x, 'int32')
        y = K.cast(y, 'int32')
        w = K.cast(w, 'int32')
        h = K.cast(h, 'int32')

        rs = tf.image.resize(img[:, y:y+h, x:x+w, :], (self.pool_size, self.pool_size))

        #final_output = K.reshape(rs, (1, self.pool_size, self.pool_size, self.nb_channels))
        final_output = rs
        return final_output


    def get_config(self):
        config = {'pool_size': self.pool_size,
                  'num_rois': self.num_rois}
        base_config = super(RoiPoolingConv, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))



def build_model():

    #input_shape = (1,7,7,512)

    pooling_regions = 2

    input_rois = layers.Input(shape=(4), name='input_rois')
    input_layer = layers.Input(shape=(IMG_SIZE,IMG_SIZE,CHANNELS), name='input_layer')

    # out_roi_pool.shape = (1, num_rois, channels, pool_size, pool_size)
    # num_rois (4) 7x7 roi pooling
    base_layers = base_model(input_layer, training=False)
    out_roi_pool = RoiPoolingConv(pooling_regions, 1)([base_layers, input_rois])

    # Flatten the convlutional layer and connected to 2 FC and 2 dropout
#     x = layers.TimeDistributed(layers.Flatten(name='flatten'))(out_roi_pool)
#     x = layers.TimeDistributed(layers.Dense(512, activation='relu', name='fc1'))(x)
#     x = layers.TimeDistributed(layers.Dropout(0.2))(x)
#     x = layers.TimeDistributed(layers.Dense(32, activation='relu', name='fc2'))(x)
#     x = layers.TimeDistributed(layers.Dropout(0.2))(x)

    x = layers.Flatten(name='flatten')(out_roi_pool)
    x = layers.Dense(512, activation='relu', name='fc1')(x)
    x = layers.Dropout(0.2)(x)
    x = layers.Dense(32, activation='relu', name='fc2')(x)
    x = layers.Dropout(0.2)(x)
    output_1 = layers.Dense(1, name='output_1')(x)
    output_2 = layers.Dense(1, name='output_2')(x)

    model = models.Model(inputs=[input_layer, input_rois], outputs=[output_1, output_2])

    return model

model=build_model()
model.summary()

model.compile(optimizer=tf.keras.optimizers.Adam(),
              loss={'output_1': 'mse', 'output_2': 'mse'},
 metrics={'output_1':srcc,
          'output_2':srcc})

lr = float(0.001)
def scheduler(epoch):
    if epoch < 5:
        return lr
    else:
        return lr*tf.math.exp(-0.1)

callback = tf.keras.callbacks.LearningRateScheduler(scheduler)
checkpoint_filepath = "./models/"+model_name+'_checkpoint'
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True,
    monitor='val_accuracy',
    mode='max',
    save_best_only=True)

start = time.time()
history = model.fit(train_ds,
                    epochs=EPOCHS,
                    callbacks=[callback,model_checkpoint_callback],
                    validation_data=val_ds)
print('\nTraining took {}'.format(time.time()-start))

export_path = "./models/"+model_name+".h5"
model.save(export_path)
print("Model was exported in this path: '{}'".format(export_path))

# Get the dictionary containing each metric and the loss for each epoch
history_dict = history.history
history_path = 'fit_history_'+model_name+'.pkl'
with open(history_path, 'wb') as f:
    pickle.dump(history_dict, f, pickle.HIGHEST_PROTOCOL)


y_pred = model.predict(test_ds)
with open('test_img_qual_'+model_name+'.npy', 'wb') as f:
    np.save(f, y_pred[0])
with open('test_patch_qual_'+model_name+'.npy', 'wb') as f:
    np.save(f, y_pred[1])
# qual_pred = y_pred[1]
img_qual_pred = []
for i in range(len(y_pred[0])):
    img_qual_pred.append(y_pred[0][i][0])

patch_qual_pred = []
for i in range(len(y_pred[1])):
    img_qual_pred.append(y_pred[1][i][0])

# print(val_y_qual)
# print(y_qual_pred)

print('Image SRCC: ', spearmanr(test_img_qual, img_qual_pred))
print('Image LCC: ', pearsonr(test_img_qual, img_qual_pred))


Model: "model_10"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_rois (InputLayer)         [(None, 4)]          0                                            
__________________________________________________________________________________________________
tf.__operators__.getitem_92 (Sl ()                   0           input_rois[0][0]                 
__________________________________________________________________________________________________
tf.__operators__.getitem_94 (Sl ()                   0           input_rois[0][0]                 
__________________________________________________________________________________________________
tf.__operators__.getitem_91 (Sl ()                   0           input_rois[0][0]                 
___________________________________________________________________________________________

InvalidArgumentError:  input image must be of non-zero size
	 [[node model_10/tf.image.resize_11/resize/ResizeBilinear (defined at <ipython-input-114-cc202340c714>:331) ]] [Op:__inference_train_function_28770]

Function call stack:
train_function
