In [None]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import json
from sklearn.model_selection import train_test_split
from sklearn import linear_model
import csv
from keras import optimizers
import keras
from functools import partial
from math import exp
from keras.utils import get_custom_objects
from keras.layers import Activation
from keras.callbacks import ModelCheckpoint
import os
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score
from sklearn.metrics import classification_report 
import joblib
import pickle
from sklearn.preprocessing import label_binarize
from sklearn.metrics import roc_auc_score, roc_curve
from itertools import cycle

In [None]:
gpus = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_visible_devices(gpus[0], 'GPU')
tf.config.experimental.set_memory_growth(gpus[0], True)

gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        # Set only the first GPU as visible
        tf.config.set_visible_devices(gpus[0], 'GPU')
        # Allow memory growth to allocate memory dynamically on the GPU
        tf.config.experimental.set_memory_growth(gpus[0], True)
        print("GPU configuration successful.")
    except RuntimeError as e:
        print(e)
else:
    print("No GPU detected.")

In [None]:
from keras.mixed_precision import Policy
from keras.mixed_precision import set_global_policy

policy = Policy('mixed_float16')
set_global_policy(policy)

In [None]:
def load_data(data_path):
    """Loads training dataset.
    """
    X_train = np.load(f'{data_path}/X_train.npy')
    X_test = np.load(f'{data_path}/X_test.npy')
    X_validation = np.load(f'{data_path}/X_val.npy')
 
    y_train = np.load(f'{data_path}/y_train.npy')
    y_test = np.load(f'{data_path}/y_test.npy')
    y_validation = np.load(f'{data_path}/y_val.npy')

    y_train = y_train[..., np.newaxis]
    y_test = y_test[..., np.newaxis]
    y_validation = y_validation[..., np.newaxis]

    print("Dataset loaded!")

    return X_train, X_test, X_validation, y_train, y_test, y_validation

In [None]:
def prepare_dataset(data_path):
    """Creates train, validation and test sets.
    """
    # load dataset
    X_train, X_test, X_validation, y_train, y_test, y_validation = load_data(data_path)
    
    ########## Scaleing the data ##########
    scaler = StandardScaler()
    num_instances, num_time_steps, num_features = X_train.shape
    X_train = X_train.reshape(-1, num_features)
    X_train = scaler.fit_transform(X_train)
    
    #reshapeing
    X_train = X_train.reshape(num_instances, num_time_steps, num_features) 
    num_instances, num_time_steps, num_features = X_test.shape
    X_test = X_test.reshape(-1, num_features)
    X_test = scaler.fit_transform(X_test)
    
    #reshapeing
    X_test = X_test.reshape(num_instances, num_time_steps, num_features) 
    num_instances, num_time_steps, num_features = X_validation.shape
    X_validation = X_validation.reshape(-1, num_features)
    X_validation = scaler.fit_transform(X_validation)
    
    #reshapeing
    X_validation = X_validation.reshape(num_instances, num_time_steps, num_features) 
    
    # Save the scaler to a file
    joblib.dump(scaler, './scaler/scaler.pkl')
    
    # add an axis to nd array
    X_train = X_train[..., np.newaxis]
    X_test = X_test[..., np.newaxis]
    X_validation = X_validation[..., np.newaxis]

    return X_train, y_train, X_validation, y_validation, X_test, y_test

In [None]:
DATA_PATH = "/home/ec.gpu/Desktop/Soumen/Dataset/kws/data_npy"
class_names = ['off', 'left', 'down', 'up', 'go', 'on', 'stop', 'unknown', 'right', 'yes']  #, 'silence' , 'no'
EPOCHS = 100
BATCH_SIZE = 16  #64
PATIENCE = 5
LEARNING_RATE = 0.0001
SKIP = 1
CLASS = 10

In [None]:
# generate train, validation and test sets
X_train, y_train, X_validation, y_validation, X_test, y_test = prepare_dataset(DATA_PATH)
print(X_train.shape)
print(y_train.shape)
print(X_validation.shape)
print(y_validation.shape)
print(X_test.shape)
print(y_test.shape)


Define the model

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, BatchNormalization, MaxPooling2D, Flatten, Dense, Dropout, LSTM , Reshape, Lambda

def create_CRNN_model(conv_layers,lstm_layers,  filters, kernel_size, fc_layers, use_bn, use_dropout):
    input_shape = (X_train.shape[1], X_train.shape[2], 1) 
    model = Sequential()
    for _ in range(conv_layers):
        model.add(Conv2D(filters, kernel_size=kernel_size, activation='relu', input_shape=input_shape, padding='same'))
        model.add(Conv2D(filters, kernel_size=kernel_size, activation='relu', padding='same'))
        model.add(Conv2D(filters, kernel_size=kernel_size, activation='relu', padding='same'))
        if use_bn:
            model.add(BatchNormalization())
        current_shape = model.output_shape
        if current_shape[1] > 2 and current_shape[2] > 2:
            model.add(MaxPooling2D(pool_size=(2, 2), padding='same'))
    model.add(Lambda(lambda x: tf.reshape(x, (tf.shape(x)[0], tf.shape(x)[1], tf.shape(x)[2] * tf.shape(x)[3]))))
    for _ in range(lstm_layers):
                            model.add(LSTM(256, return_sequences=True))
    model.add(Flatten())      
    for neurons in fc_layers:
        if neurons == 4:
                model.add(Dense(512, activation='relu'))
        if neurons == 3:
            model.add(Dense(256, activation='relu'))
        if neurons == 2:
                model.add(Dense(128, activation='relu'))
        if neurons == 1:
            model.add(Dense(64, activation='relu'))
        if use_dropout:
            model.add(Dropout(0.5))

    model.add(Dense(CLASS, activation='softmax'))
    #model.summary()
    return model

Train and Evaluate the Model

In [None]:
def train_evaluate_model(model):

    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=PATIENCE)
    history = model.fit(X_train, y_train, validation_data=(X_validation, y_validation),
                        epochs=EPOCHS, batch_size=BATCH_SIZE, callbacks=[early_stopping], verbose=0)
    
    # Return validation accuracy (simulate it) and model size
    accuracy = history.history['val_accuracy'][-1]
    num_params = model.count_params()
    return accuracy, num_params


Bayesian Optimization Setup Using Gaussian Processes (GPflow)

In [None]:
import gpflow
from gpflow.utilities import print_summary
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import itertools

# Objective function to optimize
def objective_function(conv_layers,lstm_layers, filters, kernel_size, fc_layers, use_bn, use_dropout):
    filters = max(int(filters), 8) 
    use_bn = bool(round(use_bn))
    use_dropout = bool(round(use_dropout))
        # Ensure fc_layers is a list
    if isinstance(fc_layers, int):
        fc_layers = [fc_layers]
    model = create_CRNN_model(conv_layers,lstm_layers,  filters, kernel_size, fc_layers, use_bn, use_dropout)
    acc, params = train_evaluate_model(model)
    
    return np.array([acc, -params])  

# Define initial hyperparameters
conv_layers = [ 1, 2, 3]			
lstm_layers = [ 1, 2]  
filters = [16, 32, 64]
kernel_size = [(3, 3), (5, 5)]
fc_layers = [1, 2, 3]  
use_bn = [1, 0]  							# Representing True as 1 and False as 0
use_dropout = [1, 0]

# Generate all combinations of hyperparameters
param_space = [
    {'conv_layers': cl, 'lstm_layers': ll,'filters': f, 'kernel_size': ks, 'fc_layers': fc, 'use_bn': bn, 'use_dropout': do}
    for cl, ll, f, ks, fc, bn, do in itertools.product(conv_layers,lstm_layers, filters, kernel_size, fc_layers, use_bn, use_dropout)
]

# Collect initial data
X_init = np.array([[p['conv_layers'],p['lstm_layers'], p['filters'], p['kernel_size'][0],p['kernel_size'][1], p['fc_layers'], p['use_bn'], p['use_dropout']] for p in param_space])
Y_init = np.array([objective_function(**p) for p in param_space])

# Verify data shapes
print("X_init shape:", X_init.shape)
print("Y_init shape:", Y_init.shape)


Gaussian Process Models

In [None]:
# Scale the data
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X_init)

# Create Gaussian Process models for each objective
kern_acc = gpflow.kernels.Matern52()
kern_size = gpflow.kernels.Matern52()

gp_acc = gpflow.models.GPR(data=(X_scaled, Y_init[:, 0:1]), kernel=kern_acc)
gp_size = gpflow.models.GPR(data=(X_scaled, Y_init[:, 1:2]), kernel=kern_size)

# Optimize GP hyperparameters
gpflow.optimizers.Scipy().minimize(gp_acc.training_loss, gp_acc.trainable_variables)
gpflow.optimizers.Scipy().minimize(gp_size.training_loss, gp_size.trainable_variables)


Bayesian Optimization Loop

In [None]:
from scipy.optimize import minimize
import csv
import numpy as np
import pandas as pd

# Initialize a list to store hyperparameters, accuracy, and model size
pareto_data = []

# Function to save the collected Pareto front data to CSV
def save_pareto_data_to_csv(pareto_data, filename="DS-CNN_pareto_front.csv"):
    
    # Convert the list of dictionaries to a pandas DataFrame
    df = pd.DataFrame(pareto_data)
    
    # Save the DataFrame to a CSV file
    df.to_csv(filename, mode='a', index=False)

# Acquisition function 
def acquisition_function(x):
    
    # Reshape x to be 2D [1, D] before passing to GP models
    x_reshaped = x.reshape(1, -1)  # Shape should be [1, D]
    
    # Predict mean and variance for each GP model
    mu_acc, var_acc = gp_acc.predict_f(x_reshaped)
    mu_size, var_size = gp_size.predict_f(x_reshaped)
    
    # Calculate acquisition score based on objectives (simplified in this case)
    return - (mu_acc + mu_size)  

# Define your objective function, which evaluates the model using the given hyperparameters
def objective_function_1(hyperparameters):
	conv_layers = hyperparameters['conv_layers']
	lstm_layers = hyperparameters['lstm_layers']
	filters = hyperparameters['filters']
	kernel_size = hyperparameters['kernel_size']
	fc_layers = hyperparameters['fc_layers']
	use_bn = hyperparameters['use_bn']
	use_dropout = hyperparameters['use_dropout']
	if isinstance(fc_layers, int):
		fc_layers = [fc_layers]
	model = create_CRNN_model(conv_layers,lstm_layers, filters, kernel_size, fc_layers, use_bn, use_dropout)
	accuracy, model_size = train_evaluate_model(model)  
	return np.array([accuracy, model_size])  # Must match the format of Y_scaled

# Function to map scaled values to the original hyperparameter values
def map_to_hyperparameters(new_x, param_space):
    hyperparameters = {}

    # Rescale the index of each parameter based on the scaled value
    for i, param in enumerate(param_space[0].keys()):
        
        # Get the values for the current parameter across all configurations
        param_values = [x[param] for x in param_space]
        
        # Clamp the value between 0 and 1 to avoid out-of-range errors
        clamped_value = min(max(new_x[0, i], 0), 1)
        
        # Map the clamped value to an index in the param_values list
        idx = int(clamped_value * (len(param_values) - 1))
        
        # Assign the parameter value to the hyperparameters dictionary
        hyperparameters[param] = param_values[idx]

    return hyperparameters

# Bayesian optimization loop
n_iterations = 100  # Number of iterations for optimization
for i in range(n_iterations):
    
    # Use a random point from X_scaled as the starting point
    x0 = X_scaled[np.random.choice(X_scaled.shape[0]), :].flatten()  # Flatten to make it 1D array
    
    # Minimize the acquisition function
    result = minimize(acquisition_function, x0, method='L-BFGS-B')
    
    # Get the new point from the optimization result
    new_x = result.x.reshape(1, -1)  # Ensure new_x is 2D
    
    # Map the optimized values to the real hyperparameter space
    hyperparameters = map_to_hyperparameters(new_x, param_space)  # Use new_x directly, not result.x
    print(f"Iteration {i+1}: Hyperparameters = {hyperparameters}")

    # Evaluate the new point using the objective function
    new_y = objective_function_1(hyperparameters).reshape(1, -1)
    
    # Update GPs with new data
    X_scaled = np.vstack((X_scaled, new_x))
    Y_scaled = np.vstack((Y_init, new_y))
    X_scaled = X_scaled[-len(Y_scaled):]    # Keep both arrays the same length

    pareto_data.append({
        "iterations": i,
        "conv_layers": hyperparameters['conv_layers'],
        "lstm_layers": hyperparameters['lstm_layers'],
        "filters": hyperparameters['filters'],
        "kernel_size": hyperparameters['kernel_size'],
        "fc_layers": hyperparameters['fc_layers'],
        "use_bn": hyperparameters['use_bn'],
        "use_dropout": hyperparameters['use_dropout'],
        "accuracy": new_y[0, 0],  # Accuracy value
        "model_size": new_y[0, 1]  # Model size value
    })
    
    # Recreate and re-optimize the GP models with updated data
    gp_acc = gpflow.models.GPR(data=(X_scaled, Y_scaled[:, 0:1]), kernel=kern_acc)
    gp_size = gpflow.models.GPR(data=(X_scaled, Y_scaled[:, 1:2]), kernel=kern_size)
    gpflow.optimizers.Scipy().minimize(gp_acc.training_loss, gp_acc.trainable_variables)
    gpflow.optimizers.Scipy().minimize(gp_size.training_loss, gp_size.trainable_variables)

# Save the collected Pareto data to a CSV file after the loop finishes
save_pareto_data_to_csv(pareto_data, "pareto_front.csv")

# Plot Pareto front

plt.scatter(Y_scaled[:, 1], Y_scaled[:, 0], color='red') 
plt.xlabel('Model Size')
plt.ylabel('Accuracy')
plt.title('Pareto Front')
plt.show()


In [None]:
import pandas as pd

def is_dominated(candidate, front):
    """
    Check if the candidate solution is dominated by any solution in the Pareto front.
    candidate: The new candidate solution (accuracy, model_size).
    front: List of Pareto-optimal solutions so far.
    """
    for solution in front:
        
        # A solution 'solution' dominates 'candidate' if it is better in all objectives
        if all(c <= s for c, s in zip(candidate, solution)) and any(c < s for c, s in zip(candidate, solution)):
            return True  # candidate is dominated by solution
    return False  # candidate is not dominated

def get_pareto_front_from_csv(csv_file):
    """
    Extract the Pareto front from the CSV file containing all the solutions.
    csv_file: The path to the CSV file containing the solutions.
    """
    # Load the CSV file into a pandas DataFrame
    data = pd.read_csv(csv_file)
    
    # Extract accuracy and model size columns (assuming these columns exist)
    pareto_front = []
    for _, row in data.iterrows():
        candidate = [row['accuracy'], row['model_size']]
        
        # Update the Pareto front if the candidate is not dominated
        if not is_dominated(candidate, pareto_front):
            
            # Remove solutions dominated by the candidate
            pareto_front = [s for s in pareto_front if not is_dominated(s, [candidate])]
            
            # Add the candidate as a Pareto-optimal solution
            pareto_front.append(candidate)

    return pareto_front

# Path to the CSV file containing all the solutions
csv_file = "pareto_front.csv"  # Change this to your actual file path

# Get the Pareto front from the CSV data
pareto_front = get_pareto_front_from_csv(csv_file)

# Save the Pareto front to a new CSV file
pareto_front_df = pd.DataFrame(pareto_front, columns=['accuracy', 'model_size'])
pareto_front_df.to_csv('pareto_front_extracted.csv', index=False)

# Optionally, visualize the Pareto front
import matplotlib.pyplot as plt
plt.scatter(pareto_front_df['model_size'], pareto_front_df['accuracy'], color='red')
plt.xlabel('Model Size')
plt.ylabel('Accuracy')
plt.title('Pareto Front')
plt.show()
