<a href="https://colab.research.google.com/github/sunshine-sutingjun/RL-optimization-for-airfoils/blob/main/Code_for_aerodynamic_optimization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Structure of DNN

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam

# Define the DNN structure as per the hyperparameters provided
def build_dnn(input_shape):
    model = Sequential([
        Dense(128, activation='selu', input_shape=(input_shape,)),
        Dropout(0.1),
        Dense(128, activation='selu'),
        Dropout(0.1),
        Dense(128, activation='selu'),
        Dropout(0.1),
        Dense(64, activation='selu'),
        Dropout(0.1),
        Dense(1)  # The output layer has one neuron to predict the lift-drag ratio
    ])

    return model

# Assuming the CST array has 14 parameters as input
input_shape = 14  # Number of CST parameters

# Build the DNN model
dnn_model = build_dnn(input_shape)

# Compile the model with the Adam optimizer and mean squared error loss function
dnn_model.compile(optimizer=Adam(learning_rate=0.001),  # Start with a learning rate of 0.001
                  loss='mean_squared_error')

# Model Summary
dnn_model.summary()

# You would also need to set up the training procedure
# Define the number of training iterations and the minibatch size
training_iterations = 6000  # Training iterations
batch_size = 64  # Minibatch size

# The actual training code would involve fitting the model on your dataset
# dnn_model.fit(x_train, y_train, epochs=training_iterations, batch_size=batch_size)


DDQN

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, Model, optimizers
import random
import collections

def get_initial_state(dataset):
    index = np.random.choice(dataset.shape[0])
    return dataset[index, :]

# Assuming an environment function that returns the next state and reward given the current state and action
def environment(state, action):
    # This function needs to be defined based on your simulation environment
    next_state = ...
    reward = ...
    return next_state, reward

# The neural network architecture based on the given screenshot
def create_q_network(num_states, num_actions, hidden_sizes):
    inputs = layers.Input(shape=(num_states,))
    x = inputs
    for size in hidden_sizes:
        x = layers.Dense(size, activation='relu')(x)
    outputs = layers.Dense(num_actions)(x)
    model = Model(inputs=inputs, outputs=outputs)
    return model

# Hyperparameters
state_size = 14
action_size = 28 # This is twice the number of elements in the state vector assuming two possible actions per element
hidden_sizes = [128, 128, 128, 128]
learning_rate = 0.001
gamma = 0.95 # discount factor
epsilon = 1.0 # exploration rate
epsilon_min = 0.01
epsilon_decay = 0.995
tau = 0.15 # target network update rate

# Create main and target networks
main_network = create_q_network(state_size, action_size, hidden_sizes)
target_network = create_q_network(state_size, action_size, hidden_sizes)
target_network.set_weights(main_network.get_weights())

optimizer = optimizers.Adam(learning_rate=learning_rate)

# Replay buffer (to be defined based on your needs)
class ReplayBuffer:
    def __init__(self, max_size, state_size, action_size):
        self.buffer = collections.deque(maxlen=max_size)
        self.state_size = state_size
        self.action_size = action_size

    def add(self, state, action, reward, next_state, done):
        self.buffer.append((state, action, reward, next_state, done))

    def sample(self, batch_size):
        minibatch = random.sample(self.buffer, batch_size)
        states = np.array([item[0] for item in minibatch])
        actions = np.array([item[1] for item in minibatch])
        rewards = np.array([item[2] for item in minibatch])
        next_states = np.array([item[3] for item in minibatch])
        dones = np.array([item[4] for item in minibatch])
        return states, actions, rewards, next_states, dones

buffer = ReplayBuffer(2000, state_size, action_size)

# The training algorithm as per the pseudo code given
for episode in range(1500):
    state = np.random.rand(state_size)  # Replace with actual initial state
    total_reward = 0

    for t in range(T):
        # Epsilon-greedy action selection
        if np.random.rand() <= epsilon:
            action = np.random.choice(action_size)
        else:
            action_values = main_network.predict(state.reshape(1, -1))
            action = np.argmax(action_values[0])

        next_state, reward = environment(state, action)
        done = True if ... else False  # Condition to be replaced with actual terminal condition

        buffer.add(state, action, reward, next_state, done)

        # Sampling from the replay buffer
        states, actions, rewards, next_states, dones = buffer.sample(batch_size)

        # Get Q values from target network
        target_q_values = target_network.predict(next_states)
        # Calculate the target values
        targets = rewards + gamma * (np.amax(target_q_values, axis=1)) * (1 - dones)

        with tf.GradientTape() as tape:
            q_values = main_network(states)
            # Select the Q value for the chosen action
            q_action = tf.reduce_sum(tf.multiply(q_values, tf.one_hot(actions, action_size)), axis=1)
            loss = tf.keras.losses.mean_squared_error(targets, q_action)

        grads = tape.gradient(loss, main_network.trainable_variables)
        optimizer.apply_gradients(zip(grads, main_network.trainable_variables))

        # Update the target network
        if t % C == 0:
            new_weights = np.array(main_network.get_weights())
            target_weights = np.array(target_network.get_weights())
            target_network.set_weights(tau * new_weights + (1 - tau) * target_weights)

        state = next_state
        total_reward += reward

        if done:
            break

    # Decaying exploration rate
    if epsilon > epsilon_min:
        epsilon *= epsilon_decay

    print(f"Episode: {episode+1}, Total Reward: {total_reward}")

# Make sure to save your model
main_network.save('ddqn_model.h5')
