<a href="https://colab.research.google.com/github/naufalhisyam/TurbidityPrediction-thesis/blob/main/train_model_DenseNet121_CV.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
!pip install tensorflow-addons
import tensorflow_addons as tfa
from sklearn.model_selection import KFold, train_test_split

In [None]:
!git clone https://github.com/naufalhisyam/TurbidityPrediction-thesis.git
os.chdir('/content/TurbidityPrediction-thesis') 

In [None]:
images = pd.read_csv(r'./Datasets/0degree_lowrange/0degInfo.csv') #load dataset info
train_df, test_df = train_test_split(images, train_size=0.9, shuffle=True, random_state=1)
Y = train_df[['Turbidity']]

In [None]:
VALIDATION_R2 = []
VALIDATION_LOSS = []
VALIDATION_MSE = []
VALIDATION_MAE = []

name = 'DenseNet_0deg_withTL'
save_dir = f'saved_models/{name}'
if not os.path.exists(save_dir):
  os.makedirs(save_dir)

In [None]:
def get_model():
    #Create model
    base_model = tf.keras.applications.DenseNet121(
        include_top=False, weights='imagenet', 
        input_shape=(224, 224, 3), pooling='avg')
    out = base_model.output
    prediction = tf.keras.layers.Dense(1, activation="linear")(out)
    model = tf.keras.Model(inputs = base_model.input, outputs = prediction)

    #Compile the model
    
    return model

def get_model_name(k):
    return 'densenet_'+str(k)+'.h5'

tf.test.gpu_device_name()

In [None]:
train_generator = tf.keras.preprocessing.image.ImageDataGenerator(
    horizontal_flip=True
)

test_generator = tf.keras.preprocessing.image.ImageDataGenerator(
    horizontal_flip=True
)

kf = KFold(n_splits = 5)
fold_var = 1

In [None]:
for train_index, val_index in kf.split(np.zeros(Y.shape[0]),Y):
  training_data = train_df.iloc[train_index]
  validation_data = train_df.iloc[val_index]
	
  train_images = train_generator.flow_from_dataframe(training_data,
                                                 x_col = "Filepath", y_col = "Turbidity",
                                                 target_size=(224, 224), color_mode='rgb',
                                                 class_mode = "raw", shuffle = True)
  val_images  = train_generator.flow_from_dataframe(validation_data,
                                                  x_col = "Filepath", y_col = "Turbidity",
                                                  target_size=(224, 224), color_mode='rgb',
                                                  class_mode = "raw", shuffle = True)
	
	# CREATE NEW MODEL
  model = get_model()
	# COMPILE NEW MODEL
  opt = tf.keras.optimizers.Adam(learning_rate=1e-4, decay=1e-6)
  model.compile(loss=tf.keras.losses.Huber(), optimizer=opt, metrics=['mae','mse', tfa.metrics.RSquare(name="R2")])
	
	# CREATE CALLBACKS
  checkpoint_filepath = f'{save_dir}/{get_model_name(fold_var)}'
  checkpoint = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_filepath,
                                                  monitor='val_loss', verbose=1, save_best_only=True, mode='min')
  callbacks_list = [checkpoint]
	# There can be other callbacks, but just showing one because it involves the model name
	# This saves the best model
	# FIT THE MODEL
  history = model.fit(train_images, epochs=100,
                      callbacks=callbacks_list,
                      validation_data=val_images)
	
	# LOAD BEST MODEL to evaluate the performance of the model
  model.load_weights(f"{save_dir}/densenet_"+str(fold_var)+".h5")
	
  results = model.evaluate(val_images)
  results = dict(zip(model.metrics_names,results))
	
  VALIDATION_R2.append(results['R2'])
  VALIDATION_MAE.append(results['mae'])
  VALIDATION_MSE.append(results['mse'])
  VALIDATION_LOSS.append(results['loss'])
	
  tf.keras.backend.clear_session()
	
  fold_var += 1

In [None]:
train_images = train_generator.flow_from_dataframe(
    dataframe=train_df,
    x_col='Filepath',
    y_col='Turbidity',
    target_size=(224, 224),
    color_mode='rgb',
    class_mode='raw',
    shuffle=False,
)

test_images = test_generator.flow_from_dataframe(
    dataframe=test_df,
    x_col='Filepath',
    y_col='Turbidity',
    target_size=(224, 224),
    color_mode='rgb',
    class_mode='raw',
    shuffle=False
)

In [None]:
min_fold = min(range(len(VALIDATION_LOSS)), key=VALIDATION_LOSS.__getitem__) + 1

model = get_model()
model.load_weights(f"{save_dir}/densenet_"+str(min_fold)+".h5")

In [None]:
test_pred = np.squeeze(model.predict(test_images))
test_true = test_images.labels
test_residuals = test_true - test_pred

train_pred = np.squeeze(model.predict(train_images))
train_true = train_images.labels
train_residuals = train_true - train_pred

train_score = model.evaluate(train_images)
test_score = model.evaluate(test_images)
print('test  ',test_score)
print('train  ', train_score)

In [None]:
f, axs = plt.subplots(1, 2, figsize=(8,6), gridspec_kw={'width_ratios': [4, 1]})

f.suptitle(f'Residual Plot - {name}', fontsize=13, fontweight='bold',  y=0.92) 
axs[0].scatter(train_pred,train_residuals, label='Train Set', alpha=0.75, color='tab:blue')   
axs[0].scatter(test_pred,test_residuals, label='Test Set', alpha=0.75, color='tab:orange')
axs[0].set_ylabel('Residual (NTU)')
axs[0].set_xlabel('Predicted Turbidity (NTU)')      
axs[0].axhline(0, color='black')
axs[0].legend()
axs[0].grid()

axs[1].hist(train_residuals, bins=50, orientation="horizontal", density=True, alpha=0.9, color='tab:blue')
axs[1].hist(test_residuals, bins=50, orientation="horizontal", density=True, alpha=0.75, color='tab:orange')
axs[1].axhline(0, color='black')
axs[1].set_xlabel('Distribution')  
axs[1].yaxis.tick_right()
axs[1].grid(axis='y')

plt.subplots_adjust(wspace=0.05)

plt.savefig(f'{save_dir}/residualPlot_{name}.png', dpi=150)
plt.show()

In [None]:
fig, ax = plt.subplots(1,2,figsize=(13,6))
fig.suptitle(f'Nilai Prediksi vs Observasi - {name}', fontsize=13, fontweight='bold',  y=0.96)

ax[0].scatter(test_true,test_pred, label=f'$Test\ R^2=${round(test_score[3],3)}',color='tab:orange', alpha=0.75)
theta = np.polyfit(test_true, test_pred, 1)
y_line = theta[1] + theta[0] * test_true
ax[0].plot([test_true.min(), test_true.max()], [y_line.min(), y_line.max()],'k--', lw=2,label='best fit')
ax[0].plot([test_true.min(), test_true.max()], [test_true.min(), test_true.max()], 'k--', lw=2, label='identity',color='dimgray')
ax[0].set_xlabel('Measured Turbidity (NTU)')
ax[0].set_ylabel('Predicted Turbidity (NTU)')
ax[0].set_title(f'Test Set', fontsize=10, fontweight='bold')
ax[0].set_xlim([0, 130])
ax[0].set_ylim([0, 130])
ax[0].grid()
ax[0].legend()

ax[1].scatter(train_true,train_pred, label=f'$Train\ R^2=${round(train_score[3],3)}', color='tab:blue', alpha=0.75)
theta2 = np.polyfit(train_true, train_pred, 1)
y_line2 = theta2[1] + theta2[0] * train_true
ax[1].plot([train_true.min(), train_true.max()], [y_line2.min(), y_line2.max()],'k--', lw=2,label='best fit')
ax[1].plot([train_true.min(), train_true.max()], [train_true.min(),train_true.max()], 'k--', lw=2, label='identity',color='dimgray')
ax[1].set_xlabel('Measured Turbidity (NTU)')
ax[1].set_ylabel('Predicted Turbidity (NTU)')
ax[1].set_title(f'Train Set', fontsize=10, fontweight='bold')
ax[1].set_xlim([0, 130])
ax[1].set_ylim([0, 130])
ax[1].grid()
ax[1].legend()

plt.savefig(f'{save_dir}/predErrorPlot_{name}.png', dpi=150)
plt.show()

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

save_path = f"/content/gdrive/MyDrive/MODEL BERHASIL/DenseNet/{name}"
if not os.path.exists(save_path):
  os.makedirs(save_path)

oripath = "saved_models/."
!cp -a "{oripath}" "{save_path}" # copies files to google drive