# 0 Load dataset

In [None]:
from google.colab import drive
drive.mount('/content/drive')
!nvidia-smi

In [None]:
PROJECT_path = '/content/drive/MyDrive/IDB_diamond_damage'

In [None]:
import os
import numpy as np
from tqdm import tqdm
from PIL import Image

In [None]:
def load_dataset(dataset_path):
  image_list = []
  label_list = []
  tag = 0
  category_names = os.listdir(dataset_path)
  category_nums = len(category_names)
  category_names.sort()
  print(category_names)
  for category in category_names:
    category_path = os.path.join(dataset_path, category)
    file_names = os.listdir(category_path)
    file_nums = len(file_names)
    file_names.sort()  
    for file in tqdm(file_names):
      file_path = os.path.join(category_path, file)
      image = Image.open(file_path)
      img = np.asarray(image,dtype="float32")

      #chose wheather to crop the images, e.g., 1024*1024
      img = img[0:1024, 0:1024]
      
      img = img[:, :, np.newaxis] 
      image_list.append(img)
      label_list.append(tag)
    tag += 1
  return image_list, label_list

In [None]:
X_set, Y_set = load_dataset(os.path.join(PROJECT_path, 'SEM'))

In [None]:
# #01~#65
toughness_index = [76.53,	82.50,	92.38,	88.75,	92.30,	81.10,	78.98,	92.48,	89.30,	86.55,	86.23,	92.05,	86.73,	77.00,	76.18,	74.20,	80.38,	91.38,	89.20,	88.08,	78.25,	77.85,	89.45,	82.55,	87.55,	83.63,	87.60,	85.75,	77.18,	75.08,	72.30,	75.28,	83.53,	84.80,	87.78,	80.50,	63.73,	74.88,	75.05,	74.65,	73.98,	79.50,	78.75,	77.45,	79.03,	78.03,	75.25,	72.28,	71.70,	74.98,	77.85,	76.28,	77.75,	75.53,	75.70,	74.85,	80.73,	80.38,	78.85,	81.58,	81.00,	80.65,	82.13,	80.20,	79.15]

# #31~#65
relative_density = [108.60,	108.60,	108.60,	108.60,	108.60,	99.66,	94.54,	95.44,	93.71,	87.58,	93.59,	94.62,	90.36,	95.28,	84.82,	89.01,	90.81,	87.69,	88.38,	88.35,	88.07,	90.25,	93.89,	89.23,	92.19,	91.32,	93.84,	94.93,	91.42,	98.09,	96.85,	96.80,	98.00,	96.16,	95.25]
rockwell_hardness = [98.06,	98.06,	98.06,	98.06,	98.06,	96.45,	95.35,	95.30,	104.45,	106.45,	103.50,	100.80,	101.80,	98.80,	97.60,	97.05,	96.90,	98.20,	94.70,	91.50,	99.50,	99.40,	96.45,	99.50,	101.25,	96.10,	95.60,	108.95,	110.70,	96.55,	110.55,	111.75,	92.55,	106.50,	105.10]
bending_strength_loss = [49.32,	53.93,	52.30,	48.24,	48.24,	45.21,	33.43,	33.92,	67.04,	62.50,	73.50,	69.87,	58.25,	73.44,	65.95,	60.14,	65.94,	67.34,	48.74,	55.50,	63.42,	61.80,	61.30,	67.09,	72.16,	65.99,	44.50,	62.41,	62.38,	41.59,	64.31,	64.75,	47.22,	67.64,	69.57]

VALUE = toughness_index

In [None]:
save_dir = os.path.join(PROJECT_path, 'saved_models')
if not os.path.isdir(save_dir):
  os.makedirs(save_dir)

# 1 Dataset processing

In [None]:
import cv2
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

In [None]:
# Normal distribution on Y_set,  mean, std, num
mean_value_list = VALUE
def setting_values(row):
  value = sorted(np.random.normal(mean_value_list[int(row)], 3, size=1))[0]
  return np.round(value, 2)

In [None]:
def regression_dataset_processX(X_set):
  
  # choose the size to convert, e.g., 256*256
  X_set = [cv2.cvtColor(cv2.resize(i, (256, 256)), cv2.COLOR_GRAY2RGB) for i in X_set]

  X_set = np.asarray(X_set)
  X_set = X_set.astype('float32')
  X_set /= 255.0 

  return X_set

In [None]:
X_set = regression_dataset_processX(X_set)

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X_set,Y_set,random_state = 888)

In [None]:
def regression_dataset_processY(Y_set):
  
  Y_set_pd = pd.DataFrame(Y_set)
  Y_set_pd.columns = ['label']
  Y_set_pd['value'] = Y_set_pd['label'].apply(setting_values)

  return Y_set_pd

In [None]:
Y_train_pd = regression_dataset_processY(Y_train)
Y_test_pd = regression_dataset_processY(Y_test)

In [None]:
print(Y_train_pd.head(20))
print('-------------------')
print(Y_test_pd.head(20))

In [None]:
min_max_scaler = MinMaxScaler()
min_max_scaler.fit(Y_train_pd)
Y_train = min_max_scaler.transform(Y_train_pd)[:, 1]
min_max_scaler.fit(Y_test_pd)
Y_test = min_max_scaler.transform(Y_test_pd)[:, 1]
Y_test_label = min_max_scaler.transform(Y_test_pd)[:, 0]

In [None]:
print(len(Y_train))
print(Y_train)
print('-------------------')
print(len(Y_test))
print(Y_test)

# 2 VGG16 regression model

## 2.1 [A] model establishment - DIY

In [None]:
# from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Activation
# from keras.models import Sequential, Model

In [None]:
# model = Sequential()

# # Block_1
# model.add(Conv2D(64, (3, 3), padding = 'same', activation='relu', input_shape=X_train.shape[1:]))
# model.add(Conv2D(64, (3, 3), padding = 'same', activation='relu'))
# model.add(MaxPooling2D((2, 2), strides=(2, 2)))

# # Block_2
# model.add(Conv2D(128, (3, 3), padding = 'same', activation='relu'))
# model.add(Conv2D(128, (3, 3), padding = 'same', activation='relu'))
# model.add(MaxPooling2D((2, 2), strides=(2, 2)))

# # Block_3
# model.add(Conv2D(256, (3, 3), padding = 'same', activation='relu'))
# model.add(Conv2D(256, (3, 3), padding = 'same', activation='relu'))
# model.add(Conv2D(256, (3, 3), padding = 'same', activation='relu'))
# model.add(MaxPooling2D((2, 2), strides=(2, 2)))

# # Block_4
# model.add(Conv2D(512, (3, 3), padding = 'same', activation='relu'))
# model.add(Conv2D(512, (3, 3), padding = 'same', activation='relu'))
# model.add(Conv2D(512, (3, 3), padding = 'same', activation='relu'))
# model.add(MaxPooling2D((2, 2), strides=(2, 2)))

# # Block_5
# model.add(Conv2D(512, (3, 3), padding = 'same', activation='relu'))
# model.add(Conv2D(512, (3, 3), padding = 'same', activation='relu'))
# model.add(Conv2D(512, (3, 3), padding = 'same', activation='relu'))
# model.add(MaxPooling2D((2, 2), strides=(2, 2)))

# # Block_6
# model.add(Flatten())
# model.add(Dense(4096, activation='relu'))
# model.add(Dense(4096, activation='relu'))
# model.add(Dense(1, activation='linear'))

## 2.1 [B] Model establishment - Import

In [None]:
from keras import applications
from keras.layers import Flatten, Dense, Dropout, Activation
from keras.models import Sequential, Model

In [None]:
base_model = applications.vgg16.VGG16(include_top=False, weights='imagenet', input_shape=X_train.shape[1:])
print(base_model.output)
model = Sequential()
model.add(Flatten(input_shape=base_model.output_shape[1:]))

# add the rest layers
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))

model.add(Dense(1, activation='linear'))
model=Model(inputs=base_model.input, outputs=model(base_model.output))

In [None]:
# transfer learning or not
for layer in base_model.layers:
  layer.trainable=False

## 2.2 Model compiling

In [None]:
from keras.models import Model

In [None]:
# choose approriate optimizer, loss function
model.compile(optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.0001, decay=1e-6), 
       loss='mse', 
       metrics=['accuracy'])

## 2.3 Model training



In [None]:
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.models import Model
from keras.preprocessing.image import ImageDataGenerator

In [None]:
# set hyper-parameters
epochs = 100
batch_size = 10

# whether or not make data augmentation 
data_augmentation = True

# whether or not apply early stopping
early_stopping = False
if not early_stopping:
  callbacks = None
else:
  callbacks = [EarlyStopping(monitor='val_loss', patience=3)]

In [None]:
if not data_augmentation:
  print('Not using data augmentation.')
  history = model.fit(X_train, Y_train, 
             batch_size=batch_size, 
             epochs=epochs, 
             validation_data=(X_test, Y_test), 
             shuffle=True, 
             callbacks=callbacks)
else:
  print('Using real-time data augmentation.')
  datagen = ImageDataGenerator(featurewise_center=False,  
                  samplewise_center=False,  
                  featurewise_std_normalization=False,  
                  samplewise_std_normalization=False, 
                  zca_whitening=False, 
                  zca_epsilon=1e-06, 
                  rotation_range=0, 
                  width_shift_range=0.1,
                  height_shift_range=0.1,
                  shear_range=0., 
                  zoom_range=0.,
                  channel_shift_range=0., 
                  fill_mode='nearest',
                  cval=0., 
                  horizontal_flip=True, 
                  vertical_flip=True, 
                  rescale=None,
                  preprocessing_function=None,
                  data_format=None,
                  validation_split=0.0)
  datagen.fit(X_train)
  history = model.fit_generator(datagen.flow(X_train, Y_train, batch_size=batch_size),  
                  epochs=epochs, 
                  steps_per_epoch=X_train.shape[0]//batch_size, 
                  validation_data=(X_test, Y_test), 
                  workers=10,
                  callbacks=callbacks)

## 2.4 Model preservation

In [None]:
import os
from keras.models import Model

In [None]:
model_path = os.path.join(save_dir, "regression_model.h5")
model.save(model_path)
model.summary()
print('Regression model saved at %s ' % model_path)

## 2.5 Training curves

In [None]:
import os
import matplotlib.pyplot as plt

In [None]:
# Plot Loss-Epoch
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train set', 'Validation set'], loc='upper left')
plt.savefig(os.path.join(save_dir, 'model_loss.png'))
plt.show()

## 2.6 Model prediction

In [None]:
from keras.models import Model

In [None]:
Y_test_pred = model.predict(X_test)
min_max_scaler.fit(Y_test_pd)
Y_test_pred_pd = pd.DataFrame({'label':list(Y_test_label), 'value':list(Y_test_pred)})
Y_test_pred = min_max_scaler.inverse_transform(Y_test_pred_pd)
Y_test_pred_pd2 = pd.DataFrame(Y_test_pred)
Y_test_pred_pd2.columns = ['label','value']
Y_test_pd['label_pred'] = pd.DataFrame(Y_test_pred_pd2['label'])
Y_test_pd['value_pred'] = pd.DataFrame(Y_test_pred_pd2['value'])
Y_test_pd.to_csv(os.path.join(save_dir, 'regression_prediction_test.csv'))
print(Y_test_pd.head(20))

## 2.7 Feature maps

In [None]:
import os
import keras
import numpy as np

In [None]:
visual_folder = os.path.join(save_dir, "regression_model_visual")
visual_plot_folder = os.path.join(save_dir, "regression_model_visual_plot")

if not os.path.isdir(visual_folder):
  os.makedirs(visual_folder)

if not os.path.isdir(visual_plot_folder):
  os.makedirs(visual_plot_folder)

In [None]:
def visual(model, data, num_layer):
  data = np.expand_dims(data, axis=0) 
  layer = keras.backend.function([model.layers[0].input], [model.layers[num_layer].output])
  f1 = layer([data])[0]
  num = f1.shape[-1]
  plt.figure(figsize=(8, 8))
  print("saving images in layer_"+str(num_layer)+" ...")
  for i in range(num):
    plt.subplot(np.ceil(np.sqrt(num)), np.ceil(np.sqrt(num)), i+1)
    layer_img = f1[0, :, :, i] * 255
    plt.imshow(layer_img, cmap='gray')
    plt.axis('off')
    visual_single_name = "layer_"+str(num_layer)+"_img_"+str(i)+".png"
    layer_single_path = os.path.join(visual_folder, visual_single_name)
    cv2.imwrite(layer_single_path, layer_img)
  layer_all_name = "layer_"+str(num_layer)+".png"
  layer_all_path = os.path.join(visual_plot_folder, layer_all_name)
  plt.savefig(layer_all_path, dpi=350)
  plt.show()

In [None]:
for i in range(19): 
  visual(model, X_test[0], i)
print("All layer images saved!")