# Artificial Neural Network

### Importing the libraries

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf

In [2]:
tf.__version__

'2.17.0'

## Part 1 - Data Preprocessing

### Importing the dataset

In [3]:
dataset = pd.read_excel('Folds5x2_pp.xlsx')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [4]:
print(X)

[[  14.96   41.76 1024.07   73.17]
 [  25.18   62.96 1020.04   59.08]
 [   5.11   39.4  1012.16   92.14]
 ...
 [  31.32   74.33 1012.92   36.48]
 [  24.48   69.45 1013.86   62.39]
 [  21.6    62.52 1017.23   67.87]]


### Splitting the dataset into the Training set and Test set

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

###Feature Scaling
the third column has values around 1000, while the other columns have values less than 100. This large difference in scale can hinder the learning process of the neural network. Therefore, in this case, scaling is recommended.

In [10]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
sc_df = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [12]:
print(X_train)

[[-1.13572795 -0.88685592  0.67357894  0.52070558]
 [-0.80630243 -0.00971567  0.45145467  0.14531044]
 [ 1.77128416  1.84743445  0.24279248 -1.88374143]
 ...
 [-0.38409993 -1.24886277  0.84522042  0.13092486]
 [-0.9232821  -1.04155299  1.54693117  0.8830852 ]
 [ 1.70136528  1.05824381 -1.20438076 -2.42285818]]


## Part 2 - Building the ANN

### Initializing the ANN

In [6]:
ann = tf.keras.models.Sequential()

### Adding the input layer and the first hidden layer

In [7]:
ann.add(tf.keras.layers.Dense(units=6, activation='relu'))

### Adding the second hidden layer

In [8]:
ann.add(tf.keras.layers.Dense(units=6, activation='relu'))

### Adding the output layer

In [9]:
ann.add(tf.keras.layers.Dense(units=1))

## Part 3 - Training the ANN

### Compiling the ANN

In [11]:
ann.compile(optimizer = 'adam', loss = 'mean_squared_error')

### Training the ANN model on the Training set

In [13]:
ann.fit(X_train, y_train, batch_size = 32, epochs = 100) # default batch size 32

Epoch 1/100
[1m240/240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - loss: 206469.6875
Epoch 2/100
[1m240/240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 202282.3281
Epoch 3/100
[1m240/240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 184791.2656
Epoch 4/100
[1m240/240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 146045.2188
Epoch 5/100
[1m240/240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 95413.2188
Epoch 6/100
[1m240/240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 50929.3555
Epoch 7/100
[1m240/240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 29539.3535
Epoch 8/100
[1m240/240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 20833.9336
Epoch 9/100
[1m240/240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 17110.5703
Epoch 10/100
[1m240/240[0m [32m━━━━━━━━

<keras.src.callbacks.history.History at 0x7eb0465390f0>

### Predicting the results of the Test set

In [15]:
y_pred = ann.predict(X_test)
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[[431.36 431.23]
 [458.7  460.01]
 [462.89 461.14]
 ...
 [469.66 473.26]
 [442.36 438.  ]
 [461.84 463.28]]


Calculate mean_squared_error and R2 score

In [16]:
from sklearn.metrics import mean_squared_error, r2_score

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred) # 1 - sum((y_pred - y_test)**2)/sum((y_pred - y_avg)**2)

print("MSE:", mse)
print("R-squared:", r2) # ~0.9 very good; < 0.7 - not great; < 0.4 -

MSE: 19.657615556350738
R-squared: 0.9327916816691264
