<a href="https://colab.research.google.com/github/sunshine-sutingjun/RL-optimization-for-airfoils/blob/main/Code_for_aerodynamic_optimization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Structure of DNN

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam

# Define the DNN structure as per the hyperparameters provided
def build_dnn(input_shape):
    model = Sequential([
        Dense(128, activation='selu', input_shape=(input_shape,)),
        Dropout(0.1),
        Dense(128, activation='selu'),
        Dropout(0.1),
        Dense(128, activation='selu'),
        Dropout(0.1),
        Dense(64, activation='selu'),
        Dropout(0.1),
        Dense(1)  # The output layer has one neuron to predict the lift-drag ratio
    ])

    return model

# Assuming the CST array has 14 parameters as input
input_shape = 14  # Number of CST parameters

# Build the DNN model
dnn_model = build_dnn(input_shape)

# Compile the model with the Adam optimizer and mean squared error loss function
dnn_model.compile(optimizer=Adam(learning_rate=0.001),  # Start with a learning rate of 0.001
                  loss='mean_squared_error')

# Model Summary
dnn_model.summary()

# You would also need to set up the training procedure
# Define the number of training iterations and the minibatch size
training_iterations = 6000  # Training iterations
batch_size = 64  # Minibatch size

# The actual training code would involve fitting the model on your dataset
# dnn_model.fit(x_train, y_train, epochs=training_iterations, batch_size=batch_size)


DDQN

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
import collections
import random

# Load your airfoil dataset
def load_airfoil_dataset(file_path):
    return np.load(file_path)

# Define the bounds for the CST parameters
def define_bounds():
    return (np.array([...]), np.array([...]))  # Fill in with actual bounds

# Deep Neural Network for CL/CD prediction
def build_dnn(input_shape):
    model = Sequential([
        Dense(128, activation='relu', input_shape=(input_shape,)),
        Dropout(0.1),
        Dense(128, activation='relu'),
        Dropout(0.1),
        Dense(128, activation='relu'),
        Dropout(0.1),
        Dense(64, activation='relu'),
        Dropout(0.1),
        Dense(1)
    ])
    return model

def compile_dnn(dnn_model):
    dnn_model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')

# Environment and simulation
def environment(state, action, dnn_model, state_lower_bound, state_upper_bound):
    next_state = np.clip(state + action, state_lower_bound, state_upper_bound)
    current_cl_cd = dnn_model.predict(state.reshape(1, -1)).flatten()
    next_cl_cd = dnn_model.predict(next_state.reshape(1, -1)).flatten()
    reward = 2.5 * (next_cl_cd - current_cl_cd)
    e = int(np.any(next_state < state_lower_bound) or np.any(next_state > state_upper_bound))
    return next_state, reward, e

# Replay Buffer
class ReplayBuffer:
    def __init__(self, capacity):
        self.buffer = collections.deque(maxlen=capacity)

    def add(self, experience):
        self.buffer.append(experience)

    def sample(self, batch_size):
        minibatch = random.sample(self.buffer, batch_size)
        states, actions, rewards, next_states, es = zip(*minibatch)
        return np.array(states), np.array(actions), np.array(rewards), np.array(next_states), np.array(es)

# Q-Network
def create_q_network(num_states, num_actions, hidden_sizes):
    inputs = tf.keras.Input(shape=(num_states,))
    x = inputs
    for size in hidden_sizes:
        x = Dense(size, activation='relu')(x)
    outputs = Dense(num_actions)(x)
    model = Model(inputs=inputs, outputs=outputs)
    return model

# Training process
def train_ddqn(airfoil_dataset, state_lower_bound, state_upper_bound, dnn_model):
    # Define training parameters
    num_states = airfoil_dataset.shape[1]
    num_actions = 28  # Assuming this is known
    hidden_sizes = [128] * 4
    buffer_capacity = 10000
    batch_size = 64
    episodes = 1000  # Total episodes to train
    epsilon = 1.0
    epsilon_min = 0.01
    epsilon_decay = 0.995
    gamma = 0.95  # Discount factor

    # Create Q-Network and Target Network
    q_network = create_q_network(num_states, num_actions, hidden_sizes)
    target_network = create_q_network(num_states, num_actions, hidden_sizes)
    target_network.set_weights(q_network.get_weights())
    
    # Optimizer for the Q-network
    optimizer = Adam(learning_rate=0.001)
    
    # Replay buffer for experience replay
    replay_buffer = ReplayBuffer(buffer_capacity)
    
    # Training loop
    for episode in range(episodes):
        state = airfoil_dataset[np.random.choice(len(airfoil_dataset))]
        total_reward = 0

        for t in range(T):  # Maximum number of steps per episode, T should be defined
            if np.random.rand() < epsilon:
                # Assuming actions are somehow bounded, action selection mechanism is needed
                action = np.random.uniform(low=-1, high=1, size=(num_actions,))  # Random action
            else:
                action_values = q_network.predict(state.reshape(1, -1))
                action = np.argmax(action_values[0])

            next_state, reward, e = environment(state, action, dnn_model, state_lower_bound, state_upper_bound)
            total_reward += reward
            replay_buffer.add((state, action, reward, next_state, e))
            
            # Experience replay
            if len(replay_buffer.buffer) >= batch_size:
                states, actions, rewards, next_states, es = replay_buffer.sample(batch_size)
                target_q_values = target_network.predict(next_states)
                targets = rewards + gamma * np.max(target_q_values, axis=1) * (1 - es)
                
                with tf.GradientTape() as tape:
                    q_values = q_network(states)
                    q_values = tf.reduce_sum(q_values * tf.one_hot(actions, num_actions), axis=1)
                    loss = tf.keras.losses.mean_squared_error(targets, q_values)
                
                gradients = tape.gradient(loss, q_network.trainable_variables)
                optimizer.apply_gradients(zip(gradients, q_network.trainable_variables))

            if e == 1:
                break  # Episode ends if design requirements are not met
            
            state = next_state
        
        # Update epsilon
        if epsilon > epsilon_min:
            epsilon *= epsilon_decay
        
        # Update target network weights
        if episode % target_update_frequency == 0:  # target_update_frequency to be defined
            target_network.set_weights(q_network.get_weights())
        
        print(f"Episode {episode}: Total reward: {total_reward}")
    
    # Save the final trained Q-network model
    q_network.save('final_q_network.h5')

# Main entry point
def main():
    file_path = 'path_to_your_airfoil_dataset.npy'  # Replace with your dataset's actual path
    airfoil_dataset = load_airfoil_dataset(file_path)
    state_lower_bound, state_upper_bound = define_bounds()
    
    # Create and compile the DNN for CL/CD prediction
    dnn_model = build_dnn(14)  # Assuming there are 14 CST parameters
    compile_dnn(dnn_model)
    
    # Load trained weights if available
    # dnn_model.load_weights('path_to_trained_dnn_weights.h5')
    
    # Train the DDQN with the loaded dataset and DNN model
    train_ddqn(airfoil_dataset, state_lower_bound, state_upper_bound, dnn_model)

if __name__ == '__main__':
    main()