In [2]:
%load_ext autoreload
%autoreload 2

import os
import sys

# Build an absolute path from this notebook's parent directory
module_path = os.path.abspath("../")

# Add to sys.path if not already present
if module_path not in sys.path:
    sys.path.append(module_path)

import aerosandbox as asb
import aerosandbox.numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from src.airfoil.compute_airfoil_quality import compute_airfoil_quality, QualityError

import plotly.express as px

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Extraction

In [None]:
airfoil_database_path = asb._asb_root / "geometry" / "airfoil" / "airfoil_database"

airfoil_database = [
    asb.Airfoil(name=filename.stem).normalize()
    for filename in airfoil_database_path.glob("*.dat")
]

## Preprocessing

### Quality check

In [None]:
# Remove airfoils that don't pass the quality tests
quality_airfoil_database = airfoil_database.copy()

for af in airfoil_database:
    try:
        compute_airfoil_quality(af, airfoil_database_path)
    except QualityError as e:
        quality_airfoil_database.remove(af)
        print(f"Airfoil {af.name.ljust(20)} failed quality checks: {e}")
        # af.draw()

### Dataframe

### Coordinates standardization

In [None]:
def build_dataset(airfoils_database: list):
  airfoils_data = []

  for airfoil in airfoils_database:
    airfoils_data.append([airfoil.name, airfoil.coordinates])

  airfoil_dataset = pd.DataFrame(airfoils_data, columns=["airfoil_name", "coordinates"])
  airfoil_dataset["shape"] = airfoil_dataset.coordinates.apply(lambda coords: coords.shape)
  airfoil_dataset["points"] = airfoil_dataset["shape"].apply(lambda shape: shape[0])

  return airfoil_dataset

airfoil_dataset = build_dataset(quality_airfoil_database)
display(airfoil_dataset)

In [None]:
fig = px.histogram(airfoil_dataset, x="points", title="Total number of points distribution")
fig.show()

### Coordinates standardization

In [None]:
# The number of coordinates for each airfoil is inconsistent across the database, so we use Cubic splines interpolation to standadize the coordinates
n_points_per_side = 40
std_airfoil_database = [airfoil.repanel(n_points_per_side) for airfoil in quality_airfoil_database]

std_airfoil_dataset = build_dataset(std_airfoil_database)
display(std_airfoil_dataset)

In [None]:
std_airfoil_dataset["points"].describe()

### Saving dataset

In [None]:
std_airfoil_dataset.to_pickle(rf"../data/processed/airfoil_dataset_{n_points_per_side}.pkl")
std_airfoil_dataset.to_json(rf"../data/processed/airfoil_dataset_{n_points_per_side}.json")