<a href="https://colab.research.google.com/github/satyamraj18/Scaled-sound-pressure-level-prediction-using-Random-Forest-Regression/blob/main/Scaled_sound_pressure_level_prediction_using_Random_Forest_Regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Importing Packages

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

### Dataset Description 
#URL "https://archive.ics.uci.edu/ml/datasets/Airfoil+Self-Noise#"
#The NASA data set comprises different size NACA 0012 airfoils at various wind tunnel speeds and angles of attack. 
#The span of the airfoil and the observer position were the same in all of the experiments.
### Attribute Information 
#This problem has the following inputs:
#1. Frequency, in Hertzs.
#2. Angle of attack, in degrees.
#3. Chord length, in meters.
#4. Free-stream velocity, in meters per second.
#5. Suction side displacement thickness, in meters.
#The only output is:
#6. Scaled sound pressure level, in decibels.

## Importing Datasets

In [None]:
dataset = pd.read_csv("NasaData.csv")
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

## Handling Missing Data

In [None]:
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(missing_values=6.5,strategy="mean")
imputer.fit(X)
X = imputer.transform(X)

## Splitting the Dataset

In [None]:
from sklearn.model_selection import train_test_split 
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=1)

## Training the Model

In [None]:
from sklearn.ensemble import RandomForestRegressor
regressor = RandomForestRegressor(n_estimators=100,random_state=1)
regressor.fit(X_train,y_train)

RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=None, max_features='auto', max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      n_estimators=100, n_jobs=None, oob_score=False,
                      random_state=1, verbose=0, warm_start=False)

## Comparing Predicted and the Test Set Results

In [None]:
y_pred = regressor.predict(X_test)
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1),y_test.reshape(len(y_test),1)),1))

[[119.08 117.74]
 [117.33 118.12]
 [125.89 120.66]
 [124.03 122.23]
 [131.92 129.34]
 [128.82 126.59]
 [132.99 133.44]
 [129.86 131.58]
 [114.31 111.91]
 [129.37 129.97]
 [122.05 118.62]
 [126.53 126.34]
 [124.7  123.92]
 [128.69 129.  ]
 [110.05 108.69]
 [122.59 125.4 ]
 [117.51 117.78]
 [124.72 123.25]
 [132.71 132.3 ]
 [125.11 125.72]
 [133.88 135.54]
 [120.66 119.56]
 [113.93 110.45]
 [126.07 123.74]
 [127.61 127.63]
 [122.42 124.76]
 [130.72 131.72]
 [125.51 123.69]
 [128.6  129.98]
 [129.35 128.52]
 [125.47 126.54]
 [125.46 125.8 ]
 [128.26 128.25]
 [130.91 130.96]
 [125.95 126.67]
 [131.49 131.24]
 [124.84 126.54]
 [126.07 125.5 ]
 [128.66 129.09]
 [133.58 133.38]
 [125.9  124.53]
 [127.41 128.71]
 [128.39 128.81]
 [125.7  123.76]
 [129.68 130.  ]
 [125.68 121.66]
 [124.54 124.45]
 [128.08 128.2 ]
 [119.44 120.04]
 [123.73 124.3 ]
 [123.02 121.77]
 [132.44 133.04]
 [131.23 131.45]
 [122.28 119.51]
 [135.41 135.87]
 [114.73 114.04]
 [128.07 129.38]
 [122.58 121.55]
 [113.2  111.5

## R2 score

In [None]:
from sklearn.metrics import r2_score
r2_score(y_test,y_pred)

0.9373476372366614