# Reading dataset

In [None]:
import pandas as pd

data = pd.read_csv('/kaggle/input/concretecombineddata/concrete_combined_data.csv')
data.rename(columns={'fine_aggregate ': 'fine_aggregate'}, inplace=True)

data = data.sample(frac=1, random_state=69).reset_index(drop=True)
data.head()

# Statistical & Multivariate Analysis

## Statistics Table

In [None]:
attrs = ['cement', 'blast_furnace_slag', 'fly_ash', 'water', 'superplasticizer',
         'coarse_aggregate', 'fine_aggregate', 'age', 'concrete_compressive_strength']

round(data[attrs].describe(), 2)

## Data Distribution histograms

In [None]:
import matplotlib.pyplot as plt

%matplotlib inline

data.hist(bins=15, color='steelblue', edgecolor='black', linewidth=1.0,
           xlabelsize=8, ylabelsize=8, grid=False)
plt.tight_layout(rect=(0, 0, 1.5, 1.5)) 

## Correlation Heatmap

In [None]:
import seaborn as sn

f, ax = plt.subplots(figsize=(10, 6))
corr = data.corr()
hm = sn.heatmap(round(corr, 2), annot=True, ax=ax, cmap='coolwarm', fmt='.2f', linewidths=.05)

f.suptitle('Data Attributes Correlation Heatmap', fontsize=14)
f.subplots_adjust(top=0.93)

## Pairwise Plots

In [None]:
cols = ['cement', 'superplasticizer', 'age', 'concrete_compressive_strength']
pp = sn.pairplot(data[cols], height=1.8, aspect=1.8, 
                plot_kws={'edgecolor': 'k', 'linewidth': 0.5},
                diag_kind='kde', diag_kws={'shade': True})

fig = pp.fig
fig.suptitle('Data Attributes Pairwise Plots', fontsize=14)
fig.subplots_adjust(top=0.93, wspace=0.3)

# Modelling
## Data preparation

In [None]:
import numpy as np

from sklearn.model_selection import train_test_split

x = data[attrs[0:-1]].to_numpy()
y = data[attrs[-1]].to_numpy()

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42)

## Training

### Linear Regression

In [None]:
from sklearn.linear_model import LinearRegression

reg = LinearRegression().fit(x_train, y_train)

### 2-layered Perceptron Regressor

In [None]:
from sklearn.neural_network import MLPRegressor

mlp = MLPRegressor(random_state=1, max_iter=500).fit(x_train, y_train)

### Deep Neural Network

In [None]:
from tensorflow import keras
from tensorflow.keras.layers.experimental import preprocessing

normalizer = preprocessing.Normalization()
normalizer.adapt(x)

dnn = keras.models.Sequential([
    normalizer,
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(1)
])
dnn.compile(loss='mean_absolute_error', optimizer=keras.optimizers.Adam(0.001))
dnn.summary()

In [None]:
%%time

history = dnn.fit(x_train, y_train, validation_split=0.2,
                    verbose=0, epochs=300)

plt.plot(history.history['loss'], label='loss')
plt.plot(history.history['val_loss'], label='val_loss')
# plt.ylim([0, 10])
plt.xlabel('Epoch')
plt.ylabel('Error')
plt.legend()
plt.grid(True)

## Evaluation

### R-squared scores

In [None]:
round(reg.score(x_test, y_test), 3)

In [None]:
round(mlp.score(x_test, y_test), 3)

In [None]:
from sklearn.metrics import r2_score

dnn_predicted = dnn.predict(x_test).flatten()
round(r2_score(y_test, dnn_predicted), 3)

### Expected vs Predicted

In [None]:
def plot_expected_predicted(expected, predicted):
    plt.scatter(expected, predicted)
    plt.xlabel('Lab test')
    plt.ylabel('Predicted')
    lim = [0, 80]
    plt.xlim(lim)
    plt.ylim(lim)
    _ = plt.plot(lim, lim)

expected = y_test
reg_predicted = reg.predict(x_test)
mlp_predicted = mlp.predict(x_test)

plt.suptitle('Linear vs MLP vs DNN')
a = plt.axes(aspect='equal')
plt.subplot(1, 3, 1)
plot_expected_predicted(expected, reg_predicted)

plt.subplot(1, 3, 2)
plot_expected_predicted(expected, mlp_predicted)

plt.subplot(1, 3, 3)
plot_expected_predicted(expected, dnn_predicted)

In [None]:
plot_expected_predicted(expected, reg_predicted)

In [None]:
plot_expected_predicted(expected, mlp_predicted)

In [None]:
plot_expected_predicted(expected, dnn_predicted)

# Predictions

In [None]:
model = {
    'linear' : lambda x : reg.predict(x),
    'mlp': lambda x : mlp.predict(x),
    'dnn': lambda x : dnn.predict(x).flatten()
}

data = pd.DataFrame([[410, 10*i, 0, 155, 0, 934, 755, 28] for i in range(40)], columns=attrs[:-1])
data.head()

In [None]:
val = data.to_numpy()

res = np.round(model['dnn'](val), 3)
res

In [None]:
data['concrete_compressive_strength'] = res
data.to_csv('predictions.csv', index=False)

# Save Models

In [None]:
dnn.save('ccs_model.h5')