In [1]:
import pandas as pd
import keras
from keras.models import Sequential,Model
from keras.layers import Dense, Dropout, BatchNormalization,Input,LeakyReLU
from keras.wrappers.scikit_learn import KerasRegressor
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.utils import np_utils
import keras.backend as Kr
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.utils import class_weight
from sklearn.utils.class_weight import compute_class_weight
import numpy as np
from numpy import exp
import matplotlib.pyplot as plt
import matplotlib;matplotlib.rcParams['figure.figsize'] = (0.5,0.4)
import pylab 
import time
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from tqdm.keras import TqdmCallback

num_sim = 100


def model_function(df_train, phi, dummy_y, num_class, sim_iteration):
    print("##### Warning messages ######")

    
    class_weights = compute_class_weight(class_weight='balanced',
                                     classes=np.unique(df_train["threshold"]),
                                     y=df_train["threshold"])
    
    class_weight_dict = dict(enumerate(class_weights))
    # DeepKriging model for continuous data
    model = Sequential()

    model.add(Dense(100, input_dim = phi.shape[1],kernel_initializer='he_uniform', activation='relu'))
    model.add(Dense(50, activation='relu'))
    model.add(Dense(50, activation='relu'))
    model.add(Dense(50, activation='relu'))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(10, activation='relu'))

    model.add(Dense(num_class, activation='softmax'))
    optimizer = keras.optimizers.Adam(learning_rate=0.0012)
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

    callbacks = [EarlyStopping(monitor='val_accuracy', patience=200),
                 ModelCheckpoint(filepath='indicator_kriging.h5', 
                                 monitor='val_accuracy', save_best_only=True),
                                 TqdmCallback(verbose=1)] 
    print("##### End of warning messages ######")
    print('<<<<<<<<<<<<<<<< Fitting DNN-model for %4d-th simulation >>>>>>>>>>>>>>>>>'%(sim_iteration + 1))
    result = model.fit(phi, dummy_y, callbacks=callbacks, class_weight = class_weight_dict,
               validation_split = 0.1, epochs = 500, batch_size = 64, verbose = 0)
    model = keras.models.load_model('indicator_kriging.h5')
    plt.plot(result.history['loss'], label='Train Loss')
    plt.plot(result.history['val_loss'], label='Val Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.title('Training vs Validation Loss')
    plt.legend()
    plt.grid(True)
    plt.show()
    return model

In [2]:
import time
import pandas as pd

def Tgh_Deepkriging(g_val, h_val):
    time_records = []

    for sim in range(num_sim):
        start_total = time.time()

        # ========== Load and split data ==========
        file_path = f"synthetic_data_simulations/Tgh_nonGaussian_1600_classification_g{g_val}_h{h_val}_{sim+1}.csv"
        df_loc = pd.read_csv(file_path)
        df_train, df_test = train_test_split(df_loc, test_size=0.1, random_state=123)
        df_train.reset_index(drop=True, inplace=True)
        df_test.reset_index(drop=True, inplace=True)
        
        train_file = "synthetic_data_simulations/training_data/Tgh_nonGaussian_1600_classification_g{}_h{}_{}train.csv".format(g_val, h_val, sim+1)
        test_file  = "synthetic_data_simulations/testing_data/Tgh_nonGaussian_1600_classification_g{}_h{}_{}test.csv".format(g_val, h_val, sim+1)
        df_train.to_csv(train_file, index=False)
        df_test.to_csv(test_file, index=False)

        # ========== Prepare training data ==========
        df_train1 = df_train.copy()
        df_train1["threshold"] = df_train1["threshold"] - 1
        dummy_y = np_utils.to_categorical(df_train1["threshold"])
        n = dummy_y.shape[1]
        print('Total number of classes %4d' %(n))
        N = len(df_train1)
        print('Training data size %4d' %(N))
        s = np.vstack((df_train1["x"],df_train1["y"])).T

        # ========== Basis construction ==========
        num_basis = [5**2, 7**2, 11**2]
        knots_1d = [np.linspace(0, 1, int(np.sqrt(i))) for i in num_basis]
        K = 0
        phi = np.zeros((N, sum(num_basis)))

        for res in range(len(num_basis)):
            theta = 1 / np.sqrt(num_basis[res]) * 2.5
            knots_s1, knots_s2 = np.meshgrid(knots_1d[res], knots_1d[res])
            knots = np.column_stack((knots_s1.flatten(), knots_s2.flatten()))
            for i in range(num_basis[res]):
                d = np.linalg.norm(s - knots[i, :], axis=1) / theta
                phi[:, i + K] = np.where(
                    (d >= 0) & (d <= 1),
                    (1 - d) ** 6 * (35 * d**2 + 18 * d + 3) / 3,
                    0
                )
            K += num_basis[res]

        # ========== Train the model ==========
        train_start = time.time()
        model = model_function(df_train, phi, dummy_y, n, sim)
        train_end = time.time()

        # ========== Construct test basis ==========
        s_test = np.vstack((df_test["x"], df_test["y"])).T
        N_test = len(df_test)
        phi_test = np.zeros((N_test, sum(num_basis)))

        K = 0
        for res in range(len(num_basis)):
            theta = 1 / np.sqrt(num_basis[res]) * 2.5
            knots_s1, knots_s2 = np.meshgrid(knots_1d[res], knots_1d[res])
            knots = np.column_stack((knots_s1.flatten(), knots_s2.flatten()))
            for i in range(num_basis[res]):
                d = np.linalg.norm(s_test - knots[i, :], axis=1) / theta
                phi_test[:, i + K] = np.where(
                    (d >= 0) & (d <= 1),
                    (1 - d) ** 6 * (35 * d**2 + 18 * d + 3) / 3,
                    0
                )
            K += num_basis[res]

        # ========== Prediction ==========
        val_start = time.time()
        pred = model.predict(phi_test)
        val_end = time.time()

        # ========== Save prediction ==========
        pred_df = pd.DataFrame(pred)
        df_test_preds = pd.concat([df_test, pred_df], axis=1)
        out_file = f"Results_DNN/Tgh_nonGaussian_1600_classification_g{g_val}_h{h_val}_{sim+1}.csv"
        df_test_preds.to_csv(out_file, index=False)

        # ========== Record computation time ==========
        time_records.append({
            "simulation": sim + 1,
            "train_time_sec": train_end - train_start,
            "val_time_sec": val_end - val_start
        })

    # ========== Save and report average times ==========
    df_times = pd.DataFrame(time_records)
    df_times.to_csv(f"DNN_time_records_g{g_val}_h{h_val}.csv", index=False)

    print("Average training time (sec):", df_times['train_time_sec'].mean())
    print("Average validation time (sec):", df_times['val_time_sec'].mean())


In [None]:
Tgh_Deepkriging(0,0)