## Airfoil Aerodynamic Coefficients Prediction using ANN - Optimization of hyperparameters
## 1. Bayesian Optimization (Ax)
MLP-based surrogate model to infer aerodynamic coefficients.

This notebook uses:
 - Bayesian Optimization methods with the Optuna library.


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import optuna

import torch
from torch import nn
from torch.utils.data import DataLoader
from torch.nn import functional as F
from matplotlib import rc
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from AirfoilVAE import AirfoilVAE
from compute_fid import compute_fid


plt.style.use('seaborn-whitegrid')

In [2]:
# Library options
pd.options.mode.chained_assignment = None  # default='warn'
rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']})
rc('font',**{'family':'serif','serif':['Palatino']})
rc('text', usetex=True)

# Get CPU or GPU device for NN
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")
print(f"CUDA version: {torch.version.cuda}")

Using cuda device
CUDA version: 11.1


### Helper functions

In [3]:
def airfoil_plot(airfoil_coords, fig=None, label=None, spacing='cosine', n_points=30):
    """
    Returns a plot of an airfoil. Used to visualize output of the optimizer. 
    Assumes 15 points for upper surface and 15 points for lower surface, with cosine spacing.
    NOTE: This function should be edited depending on the database.
    Inputs:
        - airfoil_coords: pandas DataFrame with airfoil coordinates and other parameters
    Outputs:
        - Plot of airfoil shape
    """
    if fig==None:
        fig = plt.subplot2grid((1,3), (0,0), colspan = 3)
    
    # X coordinates
    points_per_surf = int(n_points/2)
    if spacing == 'cosine':
        x = list(reversed([0.5*(1-np.cos(ang)) for ang in np.linspace(0,np.pi,points_per_surf+2)]))
        aux_x = list([0.5*(1-np.cos(ang)) for ang in np.linspace(0,np.pi,points_per_surf+2)[1:points_per_surf+1]])
        [x.append(i) for i in aux_x]
        x.append(1)
    elif spacing == 'linear':
        x = list(reversed(list(np.linspace(0,1,points_per_surf+2))))
        aux_x = list(list(np.linspace(0,1,points_per_surf+2)[1:points_per_surf+1]))
        [x.append(i) for i in aux_x]
        x.append(1)    

    # Y coordinates
    y = []
    origin = (airfoil_coords.iloc[0][0]+airfoil_coords.iloc[0][points_per_surf])/2
    y.append(0)
    [y.append(j) for j in airfoil_coords.iloc[0][0:points_per_surf].values.tolist()]
    y.append(origin)
    #aux_y = list(reversed(airfoil_coords.iloc[points_per_surf:n_points].values.tolist()))
    aux_y = list(airfoil_coords.iloc[0][points_per_surf:n_points].values.tolist())
    [y.append(k) for k in aux_y]
    y.append(0)

    # Airfoil plot     
    fig.plot(x, y, label = label)
    fig.set_xlim([-0.1,1.1])
    fig.set_ylim([-0.2,0.3])
    fig.set_ylabel('$y/c$')
    fig.set_xlabel('$x/c$') 
    fig.set_title('Airfoil plot', fontsize=16)
    fig.legend(frameon=True)
    if fig==None:
        plt.show()
    return


def torch_test_split(X, y = None, test_size=0.2, seed=1234):
    """
    Returns a train and test set in PyTorch tensor format from a numpy array dataset.
    Inputs:
        - X: numpy array with input data. Each row is a training/testing sample and each column is a feature.
        - y: numpy array with output data. Each row is a training/testing sample and each column is an output.
        - test_size: proportion of the dataset to be used as test set.
        - seed: random seed for reproducibility.
    Outputs:
        - training_data: PyTorch tensor with training data.
        - test_data: PyTorch tensor with test data.
    """
    X_train_0, X_test_0, y_train_0, y_test_0 = train_test_split(X, y, test_size=test_size, random_state=seed)
    X_train = torch.from_numpy(X_train_0).float()
    X_test = torch.from_numpy(X_test_0).float()
    y_train = torch.from_numpy(y_train_0).float()
    y_test = torch.from_numpy(y_test_0).float()
    training_data = []
    testing_data = []
    for i in range(len(X_train)):
        training_data.append((X_train[i], y_train[i]))
    for i in range(len(X_test)):
        testing_data.append((X_test[i], y_test[i]))
    return training_data, testing_data

def normalize_data (data, scaler):
    """
    Normalizes neural network inputs and outputs.
    Inputs:
        - data: data to be normalized. [np.array / pd.DataFrame]
        - scaler: pre-fitted scaler object.
    Outputs:
        - normalized data. [pd.DataFrame]
    """
    if type(data) == pd.DataFrame:
        data = data.to_numpy().reshape(-1,scaler.n_features_in_)
    elif type(data) == np.ndarray:
        data = data.reshape(-1,scaler.n_features_in_)
    else:
        raise(TypeError('Input data must be either a pd.DataFrame or a np.ndarray'))
    norm_data = pd.DataFrame(data = scaler.transform(data), columns = scaler.feature_names_in_)
    return norm_data

def denormalize_data (data, scaler):
    """
    Denormalizes neural network inputs and outputs.
    Inputs:
        - data: data to be denormalized. [np.array / pd.DataFrame]
        - scaler: pre-fitted scaler object.
    Outputs:
        - denormalized data. [pd.DataFrame]
    """
    if type(data) == pd.DataFrame:
        data = data.to_numpy().reshape(-1,scaler.n_features_in_)
    elif type(data) == np.ndarray:
        data = data.reshape(-1,scaler.n_features_in_)
    else:
        raise(TypeError('Input data must be either a pd.DataFrame or a np.ndarray'))
    denorm_data = pd.DataFrame(data = scaler.inverse_transform(data), columns = scaler.feature_names_in_)
    return denorm_data

def init_weights(m):
    """
    Resets all the weights in a given model. Uses a normal distribution with varying standard deviation for each weight depending on the layer type.
    To be applied to a PyTorch model object as model.apply(init_weights).
    Obtained from: https://stackoverflow.com/questions/64699434/reset-model-parameters-and-weights-of-a-network-pytorch-for-cross-validation
    """
    if isinstance(m, nn.Embedding):
        nn.init.normal_(m.weight, mean=0.0, std=0.1) ## or simply use your layer.reset_parameters()
    if isinstance(m, nn.Linear):
        nn.init.normal_(m.weight, mean=0.0, std=np.sqrt(1 / m.in_features))
        if m.bias is not None: 
            nn.init.zeros_(m.bias)
    if isinstance(m, nn.Conv1d):
            nn.init.normal_(m.weight, mean=0.0, std=np.sqrt(4 / m.in_channels))
            if m.bias is not None: 
                nn.init.zeros_(m.bias)


## 1. Import data and optimize the model architecture
A simple VAE will be taken as a baseline and its architecture (number of layers and units per hidden layer) will be optimized.

### 1.1 Load data

In [4]:
# Define input dataset (.csv) name and path
data_folder = './data/'
dataset_name = 'NACA4Digit_Dataset15Point.csv'
dataset_name = 'UIUC_dataset_198p.csv'

# Import dataset
airfoil_df = pd.read_csv(data_folder + dataset_name)
try:
    airfoil_df = airfoil_df.drop('Unnamed: 0', axis=1)    # Remove first column, counter
except: pass
num_coords = int(sum([1 for col in airfoil_df if col.startswith('y')]))
print(f'Number of airfoil coordinates: {num_coords}')
airfoil_df = airfoil_df.drop_duplicates(subset=['yU_1'], keep='first')    # Remove duplicate airfoil coordinates

airfoil_df.head()

Number of airfoil coordinates: 198


Unnamed: 0,yU_1,yU_2,yU_3,yU_4,yU_5,yU_6,yU_7,yU_8,yU_9,yU_10,...,yL_90,yL_91,yL_92,yL_93,yL_94,yL_95,yL_96,yL_97,yL_98,yL_99
0,5.2e-05,0.000207,0.000466,0.000828,0.001293,0.00186,0.002528,0.003298,0.004166,0.005133,...,0.002838,0.002308,0.00183,0.001406,0.001036,0.000721,0.000462,0.000261,0.000116,2.9e-05
1,5.9e-05,0.000235,0.000529,0.000938,0.001461,0.002096,0.00284,0.003691,0.004645,0.005697,...,0.001694,0.00138,0.001096,0.000843,0.000622,0.000433,0.000278,0.000157,7e-05,1.7e-05
2,5.1e-05,0.000204,0.000462,0.000829,0.001308,0.001905,0.002625,0.00347,0.004443,0.005543,...,0.001289,0.00106,0.000852,0.000664,0.000497,0.000351,0.000228,0.00013,5.8e-05,1.5e-05
3,2.6e-05,0.000105,0.000236,0.00042,0.000656,0.000943,0.001282,0.001669,0.002104,0.002581,...,-0.000372,-0.000303,-0.00024,-0.000185,-0.000137,-9.6e-05,-6.2e-05,-3.5e-05,-1.6e-05,-4e-06
4,2.4e-05,9.5e-05,0.000213,0.00038,0.000593,0.000854,0.001162,0.001514,0.00191,0.002347,...,-0.000223,-0.000178,-0.000138,-0.000103,-7.3e-05,-5e-05,-3.1e-05,-1.7e-05,-7e-06,-2e-06


In [5]:
# Data scaler fitting
scaler = MinMaxScaler()
scaler.fit(airfoil_df)

# Assemble a DataFrame with all the minimum and maximum values of each column
# For normalization and de-normalization. Gives an idea of the bounds.
scaler_bounds = pd.DataFrame(data = np.stack([scaler.feature_names_in_, scaler.data_min_, scaler.data_max_], axis=1), columns=['property', 'min', 'max'])

# Data normalization
airfoil_df_norm = normalize_data(airfoil_df, scaler)

# Input and "output" features
# Input and output features are both the same for this dataset.
try:
    X = airfoil_df_norm.drop(['Cl', 'Cd', 'Cm', 'ReynoldsNumber', 'MachNumber', 'alpha'], axis=1).values
except: 
    X = airfoil_df_norm.values

# Data tensors
training_data, test_data = torch_test_split(X, X, test_size=0.2)

# Data loaders
batch_size = 32

train_dataloader = DataLoader(training_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)

for X, y in test_dataloader:
    print(f"Shape of X [N, C, H, W]: {X.shape} {y.dtype}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break

Shape of X [N, C, H, W]: torch.Size([32, 198]) torch.float32
Shape of y: torch.Size([32, 198]) torch.float32




### 1.2 Architectural search
First, we will look for the most optimal architecture for our VAE network.

In [6]:
# Helper functions
def generate_VAE(trial):
    """
    Assemble a Variational Autoencoder with the parameters defined in the trial object.
    Inputs:
        - trial: trial object from Optuna. It generates the parameters for the network.
    Outputs:
        - model: VAE model with an architecture suggested by the trial object.
    """

    # --- VAE parameters ---
    device = 'cuda'
    latent_dims = 4
    in_channels = 198

    model = AirfoilVAE(in_channels = in_channels,
                       latent_dim = latent_dims,
                       hidden_dims = hidden_dims).to(device)
    hidden_dims = []
    n_layers = trial.suggest_int('n_layers', 2, 3)
    
    for i in range(n_layers):
        out_features = trial.suggest_int(f'n_units_l{i}', 128, 1024)
        hidden_dims.append(out_features)

    model = AirfoilVAE(in_channels = in_channels,
                       latent_dim = latent_dims,
                       hidden_dims = hidden_dims).to(device)
    print(f"Model architecture: {hidden_dims}")
    return model

def train(dataloader, model, loss_fn, optimizer, epochs = 1):
    """
    Train the input model on the input dataloader.
    Inputs:
        - Dataloader: a PyTorch DataLoader object.
        - model: a PyTorch neural network model object.
        - loss_fn: a PyTorch loss function object.
        - optimizer: a PyTorch optimizer object.
        - [Optional, int] epochs: number of epochs to train the model.
    """
    for t in range(epochs):
        kld_weight_coef = 5e-6
        kld_weight = kld_weight_coef*(t/20 if t/20 <= 1 else 1)
        model.train()
        for batch, (X, y) in enumerate(dataloader):
            X, y = X.to(device), y.to(device)

            # Compute prediction error
            pred = model(X)
            loss = loss_fn(pred, weight = kld_weight)

            # Backpropagation
            optimizer.zero_grad()
            loss['loss'].backward()
            optimizer.step()
            
def test(model): 
    """
    Calculate the input model FID with respect to the training dataset.
    Inputs:
        - model: a PyTorch neural network model object.
    Outputs:
        - fid: model FID with respect to the training dataset.
    """
    sampled_norm = model.sample(10000, 'cpu', std_coef = 1)
    sampled = denormalize_data(sampled_norm.detach().cpu().numpy(),scaler)
    fid = compute_fid(airfoil_df.to_numpy(), sampled)
    
    print(f"Model FID: {fid} \n")
    return fid

In [7]:
# Objective function
def objective(trial):
    """
    Objective function. Takes a trial object that generates a MLP model architecture and outputs the test (validation) set RMSE.
    """
    model = generate_VAE(trial)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.0005, betas=(0.9, 0.999), eps=1e-08, weight_decay=0)
    train(train_dataloader, model, model.loss_function, optimizer, epochs = 5000)
    fid = test(model)
    return fid

In [9]:
# Create a study object
study = optuna.create_study(direction='minimize', pruner=optuna.pruners.HyperbandPruner())

print(f"Sampler is {study.sampler.__class__.__name__}")
print(f"Pruner is {study.pruner.__class__.__name__}")

[32m[I 2022-05-22 21:20:46,504][0m A new study created in memory with name: no-name-ee389aad-90af-494a-868b-d2cee78194f8[0m


Sampler is TPESampler
Pruner is HyperbandPruner


In [None]:
# Optimize the RMSE of the model
study.optimize(objective, n_trials=100)