In [1]:
#read dat file
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sb

columns = ['frequency', 'angle_of_attack', 'chord_length', 'free_stream_velocity', 'suction_side_displacement_thickness',
           'scaled_sound_pressure_level']

#load data from .dat file
airfoil_dataset = pd.read_csv('airfoil_self_noise.dat', sep='\t', header=None, names=columns)

In [2]:
#partition to data and target

data = airfoil_dataset.iloc[1:,0:-1]
target = airfoil_dataset.iloc[1:,-1]

data.head()

#sb.pairplot(data, diag_kind="kde")

Unnamed: 0,frequency,angle_of_attack,chord_length,free_stream_velocity,suction_side_displacement_thickness
1,1000,0.0,0.3048,71.3,0.002663
2,1250,0.0,0.3048,71.3,0.002663
3,1600,0.0,0.3048,71.3,0.002663
4,2000,0.0,0.3048,71.3,0.002663
5,2500,0.0,0.3048,71.3,0.002663


In [None]:
# #standardization
# from sklearn.preprocessing import StandardScaler

# scaler = StandardScaler()
# scaled_data = scaler.fit_transform(data)

# scaled_data_pd = pd.DataFrame(scaled_data, columns = ['frequency', 'angle_of_attack', 'chord_length', 'free_stream_velocity', 
# 'suction_side_displacement_thickness'])

# scaled_data_pd.describe()

In [3]:
#partition to train and test
from sklearn.model_selection import train_test_split

train_data, test_data, train_target, test_target = train_test_split(data, target, test_size=0.1, random_state=10)

print("Training dataset:")
print("train_data:", train_data.shape)
print("train_target:", train_target.shape)

print("Testing dataset:")
print("test_data:", test_data.shape)
print("test_target:", test_target.shape)


Training dataset:
train_data: (1351, 5)
train_target: (1351,)
Testing dataset:
test_data: (151, 5)
test_target: (151,)


In [4]:
#normalization
from sklearn.preprocessing import MinMaxScaler

# fit scaler on training data
norm = MinMaxScaler().fit(train_data)

# transform training data
train_data_norm = norm.transform(train_data)

# transform testing data
test_data_norm = norm.transform(test_data)

normalized_data_pd = pd.DataFrame(train_data_norm, columns = ['frequency', 'angle_of_attack', 'chord_length', 'free_stream_velocity', 
'suction_side_displacement_thickness'])

normalized_data_pd.head()


Unnamed: 0,frequency,angle_of_attack,chord_length,free_stream_velocity,suction_side_displacement_thickness
0,0.090909,0.067568,1.0,1.0,0.051139
1,0.14899,0.0,0.454545,0.199495,0.026412
2,0.14899,0.243243,0.454545,0.0,0.083675
3,0.005808,0.400901,0.272727,1.0,0.170798
4,0.030303,0.445946,0.454545,0.199495,0.395309


In [5]:
#Linear Regression

from sklearn.linear_model import LinearRegression

linear_regression = LinearRegression()
linear_regression.fit(train_data_norm, train_target)



LinearRegression()

In [6]:
#Neural Network

from sklearn.neural_network import MLPRegressor

neural_network = MLPRegressor(hidden_layer_sizes=(100,50,25), max_iter = 1000, activation = 'relu', solver = 'adam')

neural_network.fit(train_data_norm, train_target)


MLPRegressor(hidden_layer_sizes=(100, 50, 25), max_iter=1000)

In [7]:
#model evaluation
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.model_selection import cross_val_score


print("Mean squared error of a learned linear regression model: %.2f" % 
      mean_squared_error(test_target, linear_regression.predict(test_data_norm)))
print("Mean squared error of a learned neural network model: %.2f" % 
      mean_squared_error(test_target, neural_network.predict(test_data_norm)))
print("")

print('Variance score: %.2f' % r2_score(test_target, linear_regression.predict(test_data_norm)))
print('Variance score for neural network model: %.2f' % r2_score(test_target, neural_network.predict(test_data_norm)))
print("")

scores = cross_val_score(LinearRegression(), data, target, cv=5)
print("Cross validation linear:")
print(scores)
print("")

scores = cross_val_score(MLPRegressor(), data, target, cv=5)
print("Cross validation neural model:")
print(scores)


Mean squared error of a learned linear regression model: 20.36
Mean squared error of a learned neural network model: 7.45

Variance score: 0.54
Variance score for neural network model: 0.83

Cross validation linear:
[ 0.70551145  0.52884577  0.58908692 -0.31095165  0.53688916]

Cross validation neural model:
[-46.2891708  -29.0494926  -17.82747136 -27.86167799 -23.99597354]


In [13]:
id=18

linear_regression_prediction = linear_regression.predict(test_data_norm[id,:].reshape(1,-1))
neural_network_prediction = neural_network.predict(test_data_norm[id,:].reshape(1,-1))

print("Linear regression model predicted for id {0} value {1}".format(id, linear_regression_prediction))
print("Neural network model predicted for id {0} value {1}".format(id, neural_network_prediction))
print("")

print("Real value for patient \"{0}\" is {1}".format(id, 126.966))

Linear regression model predicted for id 18 value [118.4883164]
Neural network model predicted for id 18 value [122.02967754]

Real value for patient "18" is 126.966
