# Concrete Strength Prediction

Downloaded from https://www.kaggle.com/prathamtripathi/regression-with-neural-networking

Goal: Predict the concrete strength of multiple types of concrete with neural network

In [None]:
import os
import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error as mae

from tensorflow.keras import models, layers
from tensorflow.keras.callbacks import EarlyStopping

# Data Wrangling

In [None]:
df = pd.read_csv('/kaggle/input/regression-with-neural-networking/concrete_data.csv')
df.head()

In [None]:
df.info()

Data set has no null data.

In [None]:
plt.figure(figsize=(10,6))

heatmap = sns.heatmap(df.corr(), vmin=-1,vmax=1, annot=True, cmap='viridis')

heatmap.set_title('Correlation Heatmap', fontdict={'fontsize':12}, pad=12)
plt.show()

Here we can see that Strength, our target, is deeply correlated to Cement (type), Superplasticizer and Age.

Anyway, Neural Networks are capable to take all components at once and weight them properly so we are not taking any PCA or similar.

In the other hand, in order to make work easier for the neural netwrok we need to normalize all variables.

In [None]:
scaler = MinMaxScaler(feature_range=(0,1))
Target = df.Strength
Predictors = df.drop(columns=['Strength'])
pred_norm = pd.DataFrame(scaler.fit_transform(Predictors), columns=df.columns.values.tolist()[0:-1])

In [None]:
pred_norm.head()

In [None]:
X_tr, X_ts, Y_tr, Y_ts = train_test_split(pred_norm, Target, test_size=0.1, shuffle=True, random_state=42)
X_val, X_ts, Y_val, Y_ts = train_test_split(X_ts, Y_ts, test_size=0.5, shuffle=True,random_state=42)

# Neural Network Regression model

## Architecture : Bottleneck RNA

In [None]:
NNR = models.Sequential()

NNR.add(layers.Dense(300, activation='relu', input_shape = (pred_norm.shape[1],)))
NNR.add(layers.Dense(150, activation = 'relu'))
NNR.add(layers.Dense(50, activation = 'relu'))
NNR.add(layers.Dense(150, activation = 'relu'))
NNR.add(layers.Dense(300, activation = 'relu'))
NNR.add(layers.Dense(1))

NNR.compile(loss='mae',
           optimizer='adam',
           metrics = 'mae')
NNR.summary()

## Fitting

In [None]:
es = EarlyStopping(monitor = 'val_loss', mode='min',patience=5, verbose=1)

hNNR= NNR.fit(X_tr,Y_tr,
             epochs=100,
             validation_data=(X_val,Y_val),
             callbacks=[es])

## Evaluation

In [None]:
plt.figure(figsize=(10,6))
with plt.style.context('fivethirtyeight'):

    sns.lineplot(x=np.arange(0,len(hNNR.history['mae'])),y=hNNR.history['mae'])
    sns.lineplot(x=np.arange(0,len(hNNR.history['mae'])),y=hNNR.history['val_mae'])

    plt.legend(['Train','Validation'], loc='upper right')
    plt.xlabel('epochs')
    plt.ylabel('MAE')
    plt.title('Evaluation\nMean Absolute Error: {:.3f}'.format(hNNR.history['mae'][-1]))

plt.show()

## Testing

In [None]:
Y_pred = NNR.predict(X_ts)

Y_pred = np.array(Y_pred.reshape(Y_pred.shape[0]))

In [None]:
MAE = mae(Y_ts,Y_pred)

plt.figure(figsize=(12,8))
with plt.style.context('fivethirtyeight'):

    plt.plot(sorted(Y_ts), label='Actual')
    plt.plot(sorted(Y_pred), label='Predicted')
    plt.fill_between(x=np.arange(0,len(Y_pred)),y1=sorted(Y_pred)+MAE,y2=sorted(Y_pred)-MAE, 
                     alpha=0.1, color='r', label='MAE')

    plt.title('Testing prediction\nMean Absolute Error = {:.3f}'.format(MAE))
    plt.ylabel('Concrete Strength')
    plt.xlabel('Item')
    plt.legend()
plt.show()

If you like it, don't forget to give a like.
If you would change or don't undertstand anything in this notebook please comment it, I will give an answer.

I am just trying to build my first bottleneck RNA architecture, comment if you would use other.

Thanks for reading!