# Data Processing

## Importing the libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [2]:
dataset = pd.read_table("data/airfoil_self_noise.dat", 
                        header=None, 
                        names=['frequency', 'angle_of_attack', 'chord_length', 'free_stream_velocity', 
                               'suction_side_displacement_thickness', 'scaled_sound_pressure_level'])
X = dataset.iloc[:,:-1].values
y = dataset.iloc[:,-1].values

In [3]:
X

array([[8.00000e+02, 0.00000e+00, 3.04800e-01, 7.13000e+01, 2.66337e-03],
       [1.00000e+03, 0.00000e+00, 3.04800e-01, 7.13000e+01, 2.66337e-03],
       [1.25000e+03, 0.00000e+00, 3.04800e-01, 7.13000e+01, 2.66337e-03],
       ...,
       [4.00000e+03, 1.56000e+01, 1.01600e-01, 3.96000e+01, 5.28487e-02],
       [5.00000e+03, 1.56000e+01, 1.01600e-01, 3.96000e+01, 5.28487e-02],
       [6.30000e+03, 1.56000e+01, 1.01600e-01, 3.96000e+01, 5.28487e-02]])

In [4]:
y

array([126.201, 125.201, 125.951, ..., 106.604, 106.224, 104.204])

## Feature Scaling

In [5]:
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
X = sc.fit_transform(X)

## Splitting the dataset into the Training set and Test set

In [6]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

## Training 

In [7]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression

poly_transform=PolynomialFeatures(degree=4)
X_poly = poly_transform.fit_transform(X_train)

regressor = LinearRegression()
regressor.fit(X_poly, y_train)

LinearRegression()

## Predicting the Test set results

In [8]:
y_pred = regressor.predict(poly_transform.transform(X_test))
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[133.45 130.79]
 [120.36 119.54]
 [111.48 110.32]
 [119.59 117.4 ]
 [126.69 127.62]
 [124.99 121.66]
 [119.81 118.69]
 [129.41 130.03]
 [113.44 114.73]
 [126.36 124.21]
 [125.87 125.84]
 [131.16 131.52]
 [130.58 130.7 ]
 [118.84 117.81]
 [124.73 126.66]
 [112.6  113.14]
 [129.43 126.76]
 [133.16 133.38]
 [119.64 118.08]
 [125.34 116.15]
 [123.07 123.46]
 [130.67 130.09]
 [129.84 129.93]
 [124.69 120.66]
 [133.06 134.06]
 [123.51 122.09]
 [115.83 117.09]
 [135.99 138.76]
 [128.54 130.83]
 [128.08 128.24]
 [129.72 128.95]
 [120.43 122.53]
 [111.44 103.38]
 [125.05 126.41]
 [130.95 131.8 ]
 [127.02 130.96]
 [116.56 119.25]
 [114.23 109.64]
 [129.1  127.78]
 [126.48 129.01]
 [130.92 132.54]
 [127.3  125.48]
 [122.95 138.27]
 [122.45 123.13]
 [108.1  111.03]
 [134.96 135.96]
 [126.61 127.12]
 [127.64 129.67]
 [126.51 125.65]
 [123.   129.24]
 [122.5  123.21]
 [124.   127.  ]
 [117.01 119.91]
 [127.26 126.56]
 [122.18 121.53]
 [123.4  122.23]
 [119.77 113.3 ]
 [127.26 128.34]
 [122.9  121.7

## Measuring performance

In [9]:
from sklearn.metrics import r2_score 
from sklearn.metrics import mean_squared_error
from sklearn.metrics import median_absolute_error

In [10]:
r2_score(y_test, y_pred)

0.8253406856250833

In [11]:
mean_squared_error(y_test, y_pred)

8.2848163579326

In [12]:
median_absolute_error(y_test, y_pred)

1.6358139757004153