In [2]:
%load_ext autoreload
%autoreload 2

import os
import sys

# Build an absolute path from this notebook's parent directory
module_path = os.path.abspath("../")

# Add to sys.path if not already present
if module_path not in sys.path:
    sys.path.append(module_path)

import aerosandbox as asb
import aerosandbox.numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from src.airfoil.compute_airfoil_quality import compute_airfoil_quality, QualityError

import plotly.express as px

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Extraction

In [3]:
airfoil_database_path = asb._asb_root / "geometry" / "airfoil" / "airfoil_database"

airfoil_database = [
    asb.Airfoil(name=filename.stem).normalize()
    for filename in airfoil_database_path.glob("*.dat")
]

## Preprocessing

### Quality check

In [4]:
# Remove airfoils that don't pass the quality tests
quality_airfoil_database = airfoil_database.copy()

for af in airfoil_database:
    try:
        compute_airfoil_quality(af, airfoil_database_path)
    except QualityError as e:
        quality_airfoil_database.remove(af)
        print(f"Airfoil {af.name.ljust(20)} failed quality checks: {e}")
        # af.draw()

Airfoil as6095               failed quality checks: Airfoil has negative thickness.
Airfoil fx79w470a            failed quality checks: Airfoil has abnormally large changes in angle at (0.977417, 0.0876827), 21.6 deg.
Airfoil fx79w660a            failed quality checks: Airfoil has abnormally large changes in angle at (0.948442, -0.203101), 34.7 deg.
Airfoil mh112                failed quality checks: Airfoil has abnormally high x-coordinates.


### Dataframe

### Coordinates standardization

In [5]:
def build_dataset(airfoils_database: list):
  airfoils_data = []

  for airfoil in airfoils_database:
    airfoils_data.append([airfoil.name, airfoil.coordinates])

  airfoil_dataset = pd.DataFrame(airfoils_data, columns=["airfoil_name", "coordinates"])
  airfoil_dataset["shape"] = airfoil_dataset.coordinates.apply(lambda coords: coords.shape)
  airfoil_dataset["points"] = airfoil_dataset["shape"].apply(lambda shape: shape[0])

  return airfoil_dataset

airfoil_dataset = build_dataset(quality_airfoil_database)
display(airfoil_dataset)

Unnamed: 0,airfoil_name,coordinates,shape,points
0,2032c,"[[1.0, 0.0016], [0.95, 0.0124], [0.9, 0.0229],...","(35, 2)",35
1,a18,"[[0.9999521810074102, 0.0030692549800954484], ...","(41, 2)",41
2,a18sm,"[[1.000000015325, 0.0030649999999233747], [0.9...","(79, 2)",79
3,a63a108c,"[[1.0000122498499395, 0.0034999571255252136], ...","(99, 2)",99
4,ag03,"[[1.0000001216057246, 0.000605001790562621], [...","(180, 2)",180
...,...,...,...,...
2165,Zone-36,"[[1.00000015242192, 0.0003349965806822195], [0...","(257, 2)",257
2166,Zone-40,"[[1.000000062998738, 0.0003499964886953402], [...","(257, 2)",257
2167,Zone-46,"[[1.000000045624087, 0.0003649963443335458], [...","(257, 2)",257
2168,Zone-52,"[[0.9999999372237461, 0.00040500404031008376],...","(257, 2)",257


In [6]:
fig = px.histogram(airfoil_dataset, x="points", title="Total number of points distribution")
fig.show()

### Coordinates standardization

In [7]:
# The number of coordinates for each airfoil is inconsistent across the database, so we use Cubic splines interpolation to standadize the coordinates
n_points_per_side = 40
std_airfoil_database = [airfoil.repanel(n_points_per_side) for airfoil in quality_airfoil_database]

std_airfoil_dataset = build_dataset(std_airfoil_database)
display(std_airfoil_dataset)

Unnamed: 0,airfoil_name,coordinates,shape,points
0,2032c,"[[1.0, 0.0016], [0.9983509912596553, 0.0019581...","(79, 2)",79
1,a18,"[[0.9999521810074102, 0.0030692549800954484], ...","(79, 2)",79
2,a18sm,"[[1.000000015325, 0.0030649999999233747], [0.9...","(79, 2)",79
3,a63a108c,"[[1.0000122498499395, 0.0034999571255252136], ...","(79, 2)",79
4,ag03,"[[1.0000001216057246, 0.000605001790562621], [...","(79, 2)",79
...,...,...,...,...
2165,Zone-36,"[[1.00000015242192, 0.0003349965806822195], [0...","(79, 2)",79
2166,Zone-40,"[[1.000000062998738, 0.0003499964886953402], [...","(79, 2)",79
2167,Zone-46,"[[1.000000045624087, 0.0003649963443335458], [...","(79, 2)",79
2168,Zone-52,"[[0.9999999372237461, 0.00040500404031008376],...","(79, 2)",79


In [8]:
std_airfoil_dataset["points"].describe()

count    2170.0
mean       79.0
std         0.0
min        79.0
25%        79.0
50%        79.0
75%        79.0
max        79.0
Name: points, dtype: float64

### Saving dataset

In [9]:
std_airfoil_dataset.to_pickle(rf"../data/processed/airfoil_dataset_{n_points_per_side}.pkl")
std_airfoil_dataset.to_json(rf"../data/processed/airfoil_dataset_{n_points_per_side}.json")