In [119]:
import pandas as pd
import numpy as np
from scipy.stats import truncnorm
import pickle
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline         import Pipeline
from sklearn.preprocessing    import StandardScaler
from sklearn.svm              import SVC
from sklearn.ensemble         import GradientBoostingClassifier, RandomForestClassifier
from sklearn.dummy            import DummyClassifier
import warnings
warnings.filterwarnings("ignore")

In [120]:
np.random.seed(42)
N = 20_000

def norm_p(lst):
    a = np.array(lst, dtype=float)
    return a / a.sum()

def trunc_normal_vec(mn, mx, mean, sd, size):
    a, b = (mn - mean) / sd, (mx - mean) / sd
    return truncnorm.rvs(a, b, loc=mean, scale=sd, size=size)

df_orig = pd.read_csv("aerospace_structural_design_dataset.csv")
COLS    = df_orig.columns.tolist()

cat_spec = {
    'Material Type':         (['Aluminum','Titanium','Carbon Fiber'],           norm_p([92,  99,  109])),
    'Structural Shape':      (['Rectangular','Cylindrical','Tapered'],          norm_p([109, 99,  92])),
    'Load Distribution':     (['point load','distributed','uniform'],           norm_p([114, 94,  92])),
    'Vibration Damping':     (['Low','Moderate','High'],                        norm_p([104, 109, 87])),
    'Computational Time':    (['Short','Medium','Long'],                        norm_p([89,  119, 92])),
    'Weight Efficiency':     (['Excellent','Good','Poor'],                      norm_p([94,  109, 97])),
    'Quantum Algorithm Type':(['Shark Optimizer','Chaotic Quantum Genetic'],    norm_p([172, 128])),
}
cat_draws = {name: np.random.choice(vals, N, p=p)
             for name, (vals, p) in cat_spec.items()}

STATS = {
    'Aluminum': {
        'E':(51.81,199.83,127.16,45.09), 'v':(0.3002,0.3499,0.3265,0.0134),
        'rho':(1530.48,4424.73,3123.79,830.94), 'TS':(265.43,1490.52,840.20,371.53),
        'YM':(50.11,98.27,74.77,14.65), 'Alt':(5113.45,19962.63,12604.93,4462.26),
        'Temp':(-59.84,-40.03,-50.16,6.20), 'Pres':(100171.59,197460.25,150393.87,27746.01),
        'WS':(20.02,59.26,40.29,11.78), 'FL':(30.01,69.59,51.04,12.38),
        'Thick':(5.13,14.78,10.39,2.92), 'OptTime':(51.45,148.72,99.76,29.45),
    },
    'Titanium': {
        'E':(55.98,199.82,122.39,41.96), 'v':(0.3002,0.3498,0.3271,0.0142),
        'rho':(1520.93,4408.56,2864.28,854.06), 'TS':(261.25,1496.09,890.21,356.66),
        'YM':(50.65,99.60,73.96,13.86), 'Alt':(5085.76,19935.24,12026.51,4563.87),
        'Temp':(-59.62,-40.12,-49.38,5.70), 'Pres':(103193.68,199838.64,154348.38,28350.85),
        'WS':(20.62,59.12,38.67,11.00), 'FL':(30.38,69.95,49.55,12.04),
        'Thick':(5.12,14.94,10.32,2.85), 'OptTime':(51.90,149.83,100.30,29.24),
    },
    'Carbon Fiber': {
        'E':(50.10,199.32,123.38,43.47), 'v':(0.3006,0.3494,0.3231,0.0129),
        'rho':(1586.67,4495.76,3068.37,831.43), 'TS':(268.37,1493.05,906.10,362.69),
        'YM':(50.42,99.39,77.40,14.24), 'Alt':(5376.91,19749.58,12711.51,4611.38),
        'Temp':(-59.97,-40.04,-49.54,6.22), 'Pres':(100593.88,193557.61,144241.80,26862.31),
        'WS':(20.73,59.70,39.65,11.00), 'FL':(30.93,69.96,52.25,11.68),
        'Thick':(5.00,14.89,9.84,2.76), 'OptTime':(51.90,148.97,106.15,29.90),
    },
}

INTERNAL_KEYS = list(STATS['Aluminum'].keys())

numeric_data = {k: np.empty(N) for k in INTERNAL_KEYS}
mat_arr      = cat_draws['Material Type']

for mat in ['Aluminum', 'Titanium', 'Carbon Fiber']:
    mask = mat_arr == mat
    n    = mask.sum()
    for k in INTERNAL_KEYS:
        mn, mx, mean, sd = STATS[mat][k]
        numeric_data[k][mask] = trunc_normal_vec(mn, mx, mean, sd, n)

op_life  = np.random.randint(20, 40, size=N)
num_iter = np.random.randint(523, 1984, size=N)

TS    = numeric_data['TS']
E     = numeric_data['E']
Thick = numeric_data['Thick']
Temp  = numeric_data['Temp']
rho   = numeric_data['rho']

def minmax(x):
    return (x - x.min()) / (x.max() - x.min())

TS_n, E_n, Thick_n, Temp_n, rho_n = (minmax(TS), minmax(E),
                                       minmax(Thick), minmax(Temp), minmax(rho))

score = (
      2.5 * TS_n
    + 1.5 * E_n
    + 1.0 * Thick_n
    - 1.8 * (1 - Temp_n)
    + 0.8 * rho_n
)

logit_low  =  2.0 - 1.4 * score
logit_med  =  np.full(N, 0.5)
logit_high = -1.8 + 1.4 * score

logits = np.column_stack([logit_low, logit_med, logit_high])

exp_logits = np.exp(logits - logits.max(axis=1, keepdims=True))
probs      = exp_logits / exp_logits.sum(axis=1, keepdims=True)

labels     = np.array(['Low', 'Medium', 'High'])
durability = np.array([np.random.choice(labels, p=p) for p in probs])

KEY_TO_COL = {
    'E':'',  'v':'',  'rho':'',  'TS':'',  'YM':'',  'Alt':'',
    'Temp':'',  'Pres':'',  'WS':'',  'FL':'',  'Thick':'',  'OptTime':'',
}
col_indices = {'E':1,'v':2,'rho':3,'TS':4,'YM':5,'Alt':6,
               'Temp':7,'Pres':8,'WS':10,'FL':11,'Thick':12,'OptTime':21}
for k, idx in col_indices.items():
    KEY_TO_COL[k] = COLS[idx]

rows = {}
rows[COLS[0]]  = cat_draws['Material Type']
for k, col in KEY_TO_COL.items():
    rows[col] = numeric_data[k]
rows[COLS[9]]  = op_life
rows[COLS[13]] = cat_draws['Structural Shape']
rows[COLS[14]] = cat_draws['Load Distribution']
rows[COLS[15]] = cat_draws['Vibration Damping']
rows[COLS[16]] = cat_draws['Computational Time']
rows[COLS[17]] = cat_draws['Weight Efficiency']
rows[COLS[18]] = durability
rows[COLS[19]] = cat_draws['Quantum Algorithm Type']
rows[COLS[20]] = num_iter
rows[COLS[21]] = numeric_data['OptTime']

df_synth = pd.DataFrame(rows)[COLS]
df_synth.to_csv("aerospace_structural_design_dataset_20k.csv", index=False)

In [121]:
df = pd.read_csv("aerospace_structural_design_dataset_20k.csv")

df['Durability_bin'] = df['Durability'].map({'Low': 0, 'Medium': 1, 'High': 1})

df['Life_to_Load']      = df[COLS[9]]  / df[COLS[4]]
df['Strength_x_Thick']  = df[COLS[4]]  * df[COLS[12]]
df['Stiffness_Density'] = df[COLS[1]]  / df[COLS[3]]
df['Temp_Thickness']    = df[COLS[7]]  * df[COLS[12]]

cat_features = ['Material Type', 'Structural Shape',
                'Load Distribution', 'Vibration Damping']
df_encoded   = pd.get_dummies(df, columns=cat_features, drop_first=True)

numeric_features = [
    COLS[1],
    COLS[3],
    COLS[4],
    COLS[7],
    COLS[12],
    'Life_to_Load',
    'Strength_x_Thick',
    'Stiffness_Density',
    'Temp_Thickness',
]
dummy_features  = [c for c in df_encoded.columns
                   if c.startswith(('Material Type_', 'Structural Shape_',
                                    'Load Distribution_', 'Vibration Damping_'))]
FEATURE_COLS    = numeric_features + dummy_features

X = df_encoded[FEATURE_COLS]
y = df_encoded['Durability_bin']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42, stratify=y
)

dummy = DummyClassifier(strategy='most_frequent', random_state=42)
dummy.fit(X_train, y_train)

svc_pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('svc',    SVC(kernel='rbf', class_weight='balanced', random_state=42))
])
svc_grid = GridSearchCV(
    svc_pipe,
    param_grid={'svc__C': [0.1, 1, 10], 'svc__gamma': ['scale', 'auto', 0.01]},
    cv=3, scoring='f1', n_jobs=-1
)
svc_grid.fit(X_train, y_train)

rf = RandomForestClassifier(
    n_estimators=300, max_depth=12, min_samples_leaf=5,
    class_weight='balanced', random_state=42, n_jobs=-1
)
rf.fit(X_train, y_train)

gb_pipe = Pipeline([
    ('gb', GradientBoostingClassifier(random_state=42))
])
gb_grid = GridSearchCV(
    gb_pipe,
    param_grid={
        'gb__n_estimators' : [200, 400],
        'gb__max_depth'    : [3, 5, 7],
        'gb__learning_rate': [0.05, 0.1],
    },
    cv=3, scoring='f1', n_jobs=-1
)
gb_grid.fit(X_train, y_train)

model = gb_grid.best_estimator_

with open("durability.pkl", 'wb') as f:
        pickle.dump(model, f)