In [None]:
pip install ucimlrepo

Collecting ucimlrepo
  Downloading ucimlrepo-0.0.7-py3-none-any.whl.metadata (5.5 kB)
Downloading ucimlrepo-0.0.7-py3-none-any.whl (8.0 kB)
Installing collected packages: ucimlrepo
Successfully installed ucimlrepo-0.0.7
Note: you may need to restart the kernel to use updated packages.


#Data Load (airfoil)

In [25]:
from ucimlrepo import fetch_ucirepo 
import pandas as pd
  
# fetch dataset 
airfoil_self_noise = fetch_ucirepo(id=291) 

# data (as pandas dataframes) 
X = airfoil_self_noise.data.features 
y = airfoil_self_noise.data.targets 
  
airfoil = pd.concat([X,y], axis=1)

airfoil

Unnamed: 0,frequency,attack-angle,chord-length,free-stream-velocity,suction-side-displacement-thickness,scaled-sound-pressure
0,800,0.0,0.3048,71.3,0.002663,126.201
1,1000,0.0,0.3048,71.3,0.002663,125.201
2,1250,0.0,0.3048,71.3,0.002663,125.951
3,1600,0.0,0.3048,71.3,0.002663,127.591
4,2000,0.0,0.3048,71.3,0.002663,127.461
...,...,...,...,...,...,...
1498,2500,15.6,0.1016,39.6,0.052849,110.264
1499,3150,15.6,0.1016,39.6,0.052849,109.254
1500,4000,15.6,0.1016,39.6,0.052849,106.604
1501,5000,15.6,0.1016,39.6,0.052849,106.224


In [26]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.neural_network import MLPRegressor
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error, make_scorer

#train/validation, test 분할
train_val_df = airfoil.sample(n=500, random_state=42)
test_df = airfoil.drop(train_val_df.index)

X_train_val = train_val_df.drop(columns='scaled-sound-pressure')
y_train_val = train_val_df['scaled-sound-pressure']

X_test = test_df.drop(columns='scaled-sound-pressure')
y_test = test_df['scaled-sound-pressure']

#RMSE scoring 함수 정의
rmse = make_scorer(lambda y_true, y_pred: mean_squared_error(y_true, y_pred, squared=False), greater_is_better=False)


In [None]:
#pipeline 
pipe = Pipeline([('preprocessing', None),('regressor',RandomForestRegressor())])

#hyperparmeter grid 
hyperparam_grid = [
    #MLPRegressor
    {'regressor':[MLPRegressor(max_iter=1300)],
     'preprocessing':[StandardScaler(),MinMaxScaler()],
     'regressor__hidden_layer_sizes':[(10,),(10,10),(100,)],
     'regression__activation':['tanh','relu']},

    #SVR
    {'regressor':[SVR()],
     'preprocessing':[StandardScaler(),None],
     'regressor__gamma':[0.1, 1, 10],
     'regressor__C':[0.1, 1, 10],
     'regressor__epsilon':[0.1, 1, 10]},

    #RandomForestRegressor
    {'regressor':[RandomForestRegressor(n_estimators=100)],
     'preprocessing':[None],
     'regressor__max_features':[1,2,3,4,5]},
     
    #XGBRegressor
    {'regressor':[XGBRegressor()]}
]


Unnamed: 0,frequency,attack-angle,chord-length,free-stream-velocity,suction-side-displacement-thickness,scaled-sound-pressure
0,800,0.0,0.3048,71.3,0.002663,126.201
1,1000,0.0,0.3048,71.3,0.002663,125.201
2,1250,0.0,0.3048,71.3,0.002663,125.951
3,1600,0.0,0.3048,71.3,0.002663,127.591
4,2000,0.0,0.3048,71.3,0.002663,127.461
...,...,...,...,...,...,...
1498,2500,15.6,0.1016,39.6,0.052849,110.264
1499,3150,15.6,0.1016,39.6,0.052849,109.254
1500,4000,15.6,0.1016,39.6,0.052849,106.604
1501,5000,15.6,0.1016,39.6,0.052849,106.224


In [None]:
#Grid Search
Kfold = KFold(n_splits=5, shuffle = True, random_state=42)
grid = GridSearchCV(pipe, hyperparam_grid, scoring = rmse, refit = True, cv = kfold)

grid.fit(X_train_val,y_train_val)

#최적 모델 정보 출력
print("Best Model:", grid.best_estimator_))

#테스트셋 예측 및 RMSE 계산
y_test_pred = grid.predict(X_test)
test_rmse = mean_squared_error(y_test, y_test_pred,squared = False)

print(f"\n Test Set RMSE: {test_rmse:.4f}")