In [14]:
%load_ext autoreload
%autoreload 2

import os
import sys

# Build an absolute path from this notebook's parent directory
module_path = os.path.abspath("../")

# Add to sys.path if not already present
if module_path not in sys.path:
    sys.path.append(module_path)

import aerosandbox as asb
import aerosandbox.numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from src.Airfoil.compute_airfoil_quality import compute_airfoil_quality, QualityError

import plotly.express as px

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Extraction

In [3]:
airfoil_database_path = asb._asb_root / "geometry" / "airfoil" / "airfoil_database"

airfoil_database = [
    asb.Airfoil(name=filename.stem).normalize()
    for filename in airfoil_database_path.glob("*.dat")
]

## Preprocessing

### Quality check

In [4]:
# Remove airfoils that don't pass the quality tests
quality_airfoil_database = airfoil_database.copy()

for af in airfoil_database:
    try:
        compute_airfoil_quality(af, airfoil_database_path)
    except QualityError as e:
        quality_airfoil_database.remove(af)
        print(f"Airfoil {af.name.ljust(20)} failed quality checks: {e}")
        # af.draw()

Airfoil as6095               failed quality checks: Airfoil has negative thickness.
Airfoil fx79w470a            failed quality checks: Airfoil has abnormally large changes in angle at (0.977417, 0.0876827), 21.6 deg.
Airfoil fx79w660a            failed quality checks: Airfoil has abnormally large changes in angle at (0.948442, -0.203101), 34.7 deg.
Airfoil mh112                failed quality checks: Airfoil has abnormally high x-coordinates.


### Dataframe

### Coordinates standardization

In [33]:
def build_dataset(airfoils_database: list):
  airfoils_data = []

  for airfoil in airfoils_database:
    airfoils_data.append([airfoil.name, airfoil.coordinates])

  airfoil_dataset = pd.DataFrame(airfoils_data, columns=["airfoil_name", "coordinates"])
  airfoil_dataset["shape"] = airfoil_dataset.coordinates.apply(lambda coords: coords.shape)
  airfoil_dataset["points"] = airfoil_dataset["shape"].apply(lambda shape: shape[0])

  return airfoil_dataset

airfoil_dataset = build_dataset(quality_airfoil_database)
display(airfoil_dataset)

Unnamed: 0,airfoil_name,coordinates,shape,points
0,2032c,"[[1.0, 0.0016], [0.95, 0.0124], [0.9, 0.0229],...","(35, 2)",35
1,AV-1.7-8,"[[0.9999998992102056, 8.999538734606306e-05], ...","(111, 2)",111
2,BE5030FVNC2t,"[[0.9999989486257944, 0.0019689324948613486], ...","(140, 2)",140
3,BE5045FVNC2t,"[[0.9999992380081288, 0.0019315190145843524], ...","(100, 2)",100
4,BE5655FVNC2t,"[[0.9999990278143085, 0.002150926429073479], [...","(120, 2)",120
...,...,...,...,...
2165,ys900,"[[1.0, 0.0], [0.99898, 4e-05], [0.996, 0.00033...","(121, 2)",121
2166,ys915,"[[1.0000000000000002, -2.0822924844584812e-22]...","(121, 2)",121
2167,ys920,"[[1.0, -7.676703493157235e-20], [0.99906983985...","(121, 2)",121
2168,ys930,"[[1.0, 2.3107143604131055e-21], [0.99908001933...","(121, 2)",121


In [18]:
fig = px.histogram(airfoil_dataset, x="points", title="Total number of points distribution")
fig.show()

### Coordinates standardization

In [34]:
# The number of coordinates for each airfoil is inconsistent across the database, so we use Cubic splines interpolation to standadize the coordinates
std_airfoil_database = [airfoil.repanel(75) for airfoil in quality_airfoil_database]

std_airfoil_dataset = build_dataset(std_airfoil_database)
display(std_airfoil_dataset)

Unnamed: 0,airfoil_name,coordinates,shape,points
0,2032c,"[[1.0, 0.0016], [0.9995417966629326, 0.0016995...","(149, 2)",149
1,AV-1.7-8,"[[0.9999998992102056, 8.999538734606306e-05], ...","(149, 2)",149
2,BE5030FVNC2t,"[[0.9999989486257944, 0.0019689324948613486], ...","(149, 2)",149
3,BE5045FVNC2t,"[[0.9999992380081288, 0.0019315190145843524], ...","(149, 2)",149
4,BE5655FVNC2t,"[[0.9999990278143085, 0.002150926429073479], [...","(149, 2)",149
...,...,...,...,...
2165,ys900,"[[1.0, 0.0], [0.9995461494734935, 1.5311201460...","(149, 2)",149
2166,ys915,"[[1.0000000000000002, -2.0822924844584812e-22]...","(149, 2)",149
2167,ys920,"[[1.0, -7.676703493157235e-20], [0.99954694232...","(149, 2)",149
2168,ys930,"[[1.0, 2.3107143604131055e-21], [0.99954709711...","(149, 2)",149


In [37]:
std_airfoil_dataset["points"].describe()

count    2170.0
mean      149.0
std         0.0
min       149.0
25%       149.0
50%       149.0
75%       149.0
max       149.0
Name: points, dtype: float64

### Saving dataset

In [40]:
std_airfoil_dataset.to_pickle(r"../data/processed/airfoil_dataset.pkl")