In [None]:
##### import numpy as np
import matplotlib.pyplot as plt
import cv2
import os
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Dropout, Flatten, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras_tuner.tuners import RandomSearch
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.utils import Sequence
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import shutil, random, warnings, joblib

warnings.filterwarnings("ignore")
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
tf.random.set_seed(20)
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

In [None]:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

## read data

In [None]:
# ====================== data ======================
labels = pd.read_excel('data.xlsx')


label_list2 = ['u', 'PCF', 'EA', 'SEA', 'k']


ss = MinMaxScaler()
labels[label_list2] = ss.fit_transform(labels[label_list2])

In [None]:
labels

## Create training and testing data – run once only

In [None]:

# image_folder = "./images"
# train_folder = './train'
# test_folder = './test'

# if os.path.exists(train_folder): shutil.rmtree(train_folder)
# if os.path.exists(test_folder): shutil.rmtree(test_folder)
# os.makedirs(train_folder, exist_ok=True)
# os.makedirs(test_folder, exist_ok=True)

# image_files = [i for i in os.listdir(image_folder) if i.endswith('jpg')]
# random.shuffle(image_files)
# split_idx = int(0.7 * len(image_files))
# train_images = image_files[:split_idx]
# test_images = image_files[split_idx:]

# for img in train_images:
#     shutil.copy(os.path.join(image_folder, img), os.path.join(train_folder, img))
# for img in test_images:
#     shutil.copy(os.path.join(image_folder, img), os.path.join(test_folder, img))

# train_csv = labels[labels.name.isin(train_images)]
# test_csv = labels[labels.name.isin(test_images)]

In [None]:
# Consistent with multimodal Xception
train_folder = './train'
test_folder = './test'


train_images = [f for f in os.listdir(train_folder) if f.endswith('.jpg')]
test_images = [f for f in os.listdir(test_folder) if f.endswith('.jpg')]


train_csv = labels[labels.name.isin(train_images)]
test_csv = labels[labels.name.isin(test_images)]


## Load dataset

In [None]:
# ====================== image only ======================
class ImageDataGeneratorOnlyImage(Sequence):
    def __init__(self, csv_file, directory, batch_size, target_size, label_list, shuffle=True, augment=False):
        self.csv_file = csv_file.reset_index(drop=True)
        self.directory = directory
        self.batch_size = batch_size
        self.target_size = target_size
        self.label_list = label_list
        self.shuffle = shuffle
        self.augment = augment
        self.on_epoch_end()

        self.image_data_generator = ImageDataGenerator(
            rotation_range=20, width_shift_range=0.2, height_shift_range=0.2,
            shear_range=0.2, zoom_range=0.2, horizontal_flip=True, fill_mode='nearest'
        ) if self.augment else ImageDataGenerator()

    def __len__(self):
        return int(np.floor(len(self.csv_file) / self.batch_size))

    def __getitem__(self, index):
        batch = self.csv_file.iloc[index * self.batch_size:(index + 1) * self.batch_size]
        X = np.empty((len(batch), *self.target_size, 3))
        y = np.empty((len(batch), len(self.label_list)))

        for i, data in enumerate(batch.itertuples()):
            img_path = os.path.join(self.directory, data.name)
            image = load_img(img_path, target_size=self.target_size)
            # image = img_to_array(image) / 255.0
            image = img_to_array(image)
            
            if self.augment: image = self.image_data_generator.random_transform(image)
            X[i] = image
            y[i] = np.array([getattr(data, col) for col in self.label_list])

        return X, y

    def on_epoch_end(self):
        if self.shuffle:
            self.csv_file = self.csv_file.sample(frac=1).reset_index(drop=True)

In [None]:

batch_size = 32
target_size = (256, 256)
train_generator = ImageDataGeneratorOnlyImage(train_csv, train_folder, batch_size, target_size, label_list2, shuffle=True, augment=True)
test_generator = ImageDataGeneratorOnlyImage(test_csv, test_folder, batch_size, target_size, label_list2, shuffle=False)


In [None]:
import tensorflow as tf
tf.test.gpu_device_name()  
physical_devices = tf.config.experimental.list_physical_devices('GPU') 
for device in physical_devices:  
     print(device)

In [None]:
def plot_model_history(model_history,model_name):
    fig, axs = plt.subplots(1,2,figsize=(12,4),dpi=120)
    # summarize history for accuracy
    axs[0].plot(range(1,len(model_history.history['mse'])+1),model_history.history['mse'])
    axs[0].plot(range(1,len(model_history.history['val_mse'])+1),model_history.history['val_mse'])
    axs[0].set_title('Model mse')
    axs[0].set_ylabel('mse')
    axs[0].set_xlabel('Epoch')
    # axs[0].set_xticks(np.arange(1,len(model_history.history['accuracy'])+1),len(model_history.history['accuracy'])/10)
    # axs[0].set_xticks(np.arange(1,len(model_history.history['accuracy'])+1),len(model_history.history['accuracy'])/10)
    axs[0].legend(['train', 'val'], loc='best')
    # summarize history for loss
    axs[1].plot(range(1,len(model_history.history['loss'])+1),model_history.history['loss'])
    axs[1].plot(range(1,len(model_history.history['val_loss'])+1),model_history.history['val_loss'])
    axs[1].set_title('Model Loss')
    axs[1].set_ylabel('Loss')
    axs[1].set_xlabel('Epoch')
    # axs[1].set_xticks(np.arange(1,len(model_history.history['loss'])+1),len(model_history.history['loss'])/10)
    axs[1].legend(['train', 'val'], loc='best')
    fig.savefig(model_name+'_loss.jpg',dpi=600)
    plt.show()

In [None]:
import numpy as np

def build_model(hp):
    from tensorflow.keras.applications.xception import Xception, preprocess_input
    input_image = Input(shape=(299, 299, 3))

    # Official preprocessing ([-1, 1]); must match generator without /255.0
    x_in = tf.keras.layers.Lambda(preprocess_input, name='xcep_pre')(input_image)

    # Xception backbone (no top layer), GAP is more stable (can also switch to Flatten for equivalent comparison)
    base_model = Xception(weights='imagenet', include_top=False, input_shape=(299, 299, 3), pooling='avg')

    # Freeze only BN layers to avoid small batch statistics corruption; forward pass fixed to inference mode
    for l in base_model.layers:
        if isinstance(l, tf.keras.layers.BatchNormalization):
            l.trainable = False
    x = base_model(x_in, training=False)   # BN uses inference statistics

    # Three fully connected layers: relu → tanh → relu (units searched as hyperparameters)
    x = Dense(hp.Int('units_0', 96, 512, step=32), activation='relu')(x)
    x = Dense(hp.Int('units_1', 96, 512, step=32), activation='tanh')(x)
    x = Dense(hp.Int('units_2', 96, 512, step=32), activation='relu')(x)

    output = Dense(len(label_list2))(x)
    model = Model(inputs=input_image, outputs=output)

    # Slightly loosen the lower bound of learning rate for more stability
    lr = hp.Float('lr', min_value=3e-5, max_value=1e-3, sampling='LOG')
    model.compile(optimizer=Adam(learning_rate=lr), loss='mse', metrics=['mse'])
    return model


# ====================== Hyperparameter Search ======================
tuner = RandomSearch(
    build_model,
    objective='val_loss',
    max_trials=8,
    executions_per_trial=1,
    directory='my_dir',
    project_name='xception_tuning_no_porosity'
)

tuner.search(train_generator, epochs=10, validation_data=test_generator)
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

print("Best hyperparameters:")
print(f"Learning rate: {best_hps.get('lr')}")
print(f"Layer 1 units: {best_hps.get('units_0')} Activation: relu")
print(f"Layer 2 units: {best_hps.get('units_1')} Activation: tanh")
print(f"Layer 3 units: {best_hps.get('units_2')} Activation: relu")


In [None]:

model = tuner.hypermodel.build(best_hps)

# checkpoint = tf.keras.callbacks.ModelCheckpoint("./model.ckpt", monitor='val_loss', verbose=1,
#                                                 save_best_only=True, mode='min', save_weights_only=True)

# early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, verbose=1, restore_best_weights=True)

checkpoint = tf.keras.callbacks.ModelCheckpoint(
    "best_model.h5", monitor='val_loss', verbose=1, save_best_only=True, mode='min', save_weights_only=False
)
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, verbose=1, restore_best_weights=True)



In [None]:
model_info = model.fit(
    train_generator,
    steps_per_epoch=len(train_generator),
    epochs=100,
    validation_data=test_generator,
    validation_steps=len(test_generator),
    callbacks=[checkpoint, early_stopping]
)

In [None]:

def plot_model_history(model_history, model_name):
    fig, axs = plt.subplots(1, 2, figsize=(12, 4), dpi=120)
    axs[0].plot(model_history.history['mse'], label='Train MSE')
    axs[0].plot(model_history.history['val_mse'], label='Val MSE')
    axs[0].set_title('Model MSE')
    axs[0].legend()

    axs[1].plot(model_history.history['loss'], label='Train Loss')
    axs[1].plot(model_history.history['val_loss'], label='Val Loss')
    axs[1].set_title('Model Loss')
    axs[1].legend()

    fig.savefig(model_name + '_loss.jpg', dpi=600)
    plt.show()

plot_model_history(model_info, 'Xception_no_porosity')

hist = model_info.history
loss_df = pd.DataFrame({
    'epoch': np.arange(1, len(hist['loss'])+1),
    'train_loss': hist['loss'],
    'val_loss': hist['val_loss'],
    'train_mse': hist.get('mse', hist.get('mean_squared_error')),
    'val_mse': hist.get('val_mse', hist.get('val_mean_squared_error'))
})
loss_df.to_excel('loss_curve_data.xlsx', index=False)

print("loss data to loss_curve_data.xlsx")

In [None]:

model = tf.keras.models.load_model("best_model.h5")
y_test_true = np.vstack([y for _, y in test_generator])
y_test_pred = model.predict(test_generator)


y_test_true = ss.inverse_transform(y_test_true)
y_test_pred = ss.inverse_transform(y_test_pred)


from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
for i, label in enumerate(label_list2):
    mse = mean_squared_error(y_test_true[:, i], y_test_pred[:, i])
    mae = mean_absolute_error(y_test_true[:, i], y_test_pred[:, i])
    r2 = r2_score(y_test_true[:, i], y_test_pred[:, i])
    print(f"{label} -> MSE:{mse:.4f}  MAE:{mae:.4f}  R2:{r2:.4f}")


import matplotlib
matplotlib.rcParams['axes.unicode_minus'] = False
color_list = ['red', 'blue', 'purple', 'orange', 'green']
plt.figure(figsize=(12, 10), dpi=120)
for i, label in enumerate(label_list2):
    plt.subplot(3, 3, i+1)
    plt.scatter(y_test_pred[:, i], y_test_true[:, i], color=color_list[i])
    plt.title(label)
    plt.xlabel('Predict')
    plt.ylabel('True')
    plt.tight_layout()
plt.savefig('no_porosity.jpg', dpi=600, bbox_inches='tight')
plt.show()


with pd.ExcelWriter('true_vs_predicted_comparison_no_porosity.xlsx') as writer:
    for i, label in enumerate(label_list2):
        df_comparison = pd.DataFrame({
            'True_Value': y_test_true[:, i].ravel(),
            'Predicted_Value': y_test_pred[:, i].ravel()
        })
        df_comparison.to_excel(writer, sheet_name=label, index=False)


joblib.dump(ss, 'scaler_no_porosity.pkl')
print("Model and scaler saved successfully!")