<a href="https://colab.research.google.com/github/rickzferry/awesome-chatgpt-prompts/blob/main/Regression_with_Keras.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Creating a regression model in Keras without normalized data**




#Install and Importations

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error


## Download and Evaluate Dataset


In [None]:
concrete_data = pd.read_csv('https://cocl.us/concrete_data')
concrete_data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [None]:
concrete_data.shape

(1030, 9)

In [None]:
concrete_data.describe()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
count,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0
mean,281.167864,73.895825,54.18835,181.567282,6.20466,972.918932,773.580485,45.662136,35.817961
std,104.506364,86.279342,63.997004,21.354219,5.973841,77.753954,80.17598,63.169912,16.705742
min,102.0,0.0,0.0,121.8,0.0,801.0,594.0,1.0,2.33
25%,192.375,0.0,0.0,164.9,0.0,932.0,730.95,7.0,23.71
50%,272.9,22.0,0.0,185.0,6.4,968.0,779.5,28.0,34.445
75%,350.0,142.95,118.3,192.0,10.2,1029.4,824.0,56.0,46.135
max,540.0,359.4,200.1,247.0,32.2,1145.0,992.6,365.0,82.6


In [None]:
concrete_data.isnull().sum()

Cement                0
Blast Furnace Slag    0
Fly Ash               0
Water                 0
Superplasticizer      0
Coarse Aggregate      0
Fine Aggregate        0
Age                   0
Strength              0
dtype: int64

# Predictors and target separation.

Predictors contain all characteristics or independent variables of the data set, except the variable we are interested in predicting (Strength). Target contains only the variable we want to predict (Strength). This separation is essential for training a machine learning model, where predictors are used to learn patterns that help predict the target variable.

In [None]:
concrete_data_columns = concrete_data.columns
predictors = concrete_data[concrete_data_columns[concrete_data_columns != 'Strength']] # all columns except Strength
target = concrete_data['Strength'] # Strength column

In [None]:
predictors.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360


In [None]:
target.head()

0    79.99
1    61.89
2    40.27
3    41.05
4    44.30
Name: Strength, dtype: float64

# Train and test split

In [None]:
from sklearn.model_selection import train_test_split

# Splitting the data into training and testing, with 30% of the data for testing
X_train, X_test, y_train, y_test = train_test_split(predictors, target, test_size=0.3, random_state=42)

# Checking the size of datasets
print(f'Training set size: {X_train.shape[0]}')
print(f'Test set size: {X_test.shape[0]}')


Training set size: 721
Test set size: 309


## Import Keras


In [None]:
import keras

In [None]:
from keras.models import Sequential
from keras.layers import Dense

<a id='item33'></a>


## A. Building a baseline model


In [None]:
def regression_model():
    #define model type
    model = Sequential()

     #define layers
    model.add(Dense(10, activation='relu', input_shape=(predictors.shape[1],)))
    model.add(Dense(1))

    #compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model







# Teaching the neural network and predicting

In [None]:
#build the model
model_a = regression_model()

#empty list for mean squared errors
mses = np.array([])

#fit the model (50 times)
for i in range(1, 50):
    model_a.fit(X_train, y_train, epochs=50, verbose=0)
    y_prediction = model_a.predict(X_test)
    mses = np.append(mses, mean_squared_error(y_test, y_prediction))
#show array of mean_squared_errors
mses



array([218.1608714 , 107.48509295, 106.84833297, 106.08718756,
       104.23761038, 103.98649077, 112.52807038,  98.56027794,
        82.95245835,  76.96680371,  64.18635233,  70.99396657,
        61.34388102,  60.82751516,  57.09078885,  57.422605  ,
        71.22278083,  56.62940733,  52.35529691,  49.37354203,
        51.03074012,  49.75664374,  49.86299757,  55.19689573,
        51.85024038,  50.31717683,  49.07133003,  48.84192648,
        50.24179104,  48.57391857,  53.06508846,  50.564701  ,
        57.31203228,  67.44130202,  59.78210742,  59.89137914,
        83.98722138,  61.939907  ,  49.58956294,  48.47418808,
        50.064008  ,  63.03442806,  49.80992785,  48.28879151,
        64.28759444,  49.47603357,  55.26921689,  48.73608132,
        48.30373697])

# Error Mean and Standard Deviation

In [None]:
a_std = mses.std()
a_mean = mses.mean()
print("Error Mean: " + str(a_mean) + "\n"
     + "Standard Deviation: " + str(a_std))

Error Mean: 67.21061839288004
Standard Deviation: 28.990996240398758


# B. Normalize the data

In [None]:
#normalize predictors
predictors_norm = (predictors - predictors.mean()) / predictors.std()
predictors_norm.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597
2,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.55134
3,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569,4.976069


## Train and Test split

In [None]:
from sklearn.model_selection import train_test_split

# Splitting the data into training and testing, with 30% of the data for testing
X_train, X_test, y_train, y_test = train_test_split(predictors, target, test_size=0.3, random_state=42)

# Checking the size of datasets
print(f'Training set size: {X_train.shape[0]}')
print(f'Test set size: {X_test.shape[0]}')

Training set size: 721
Test set size: 309


In [None]:
n_cols = predictors_norm.shape[1] # number of predictors

##  Building the Neural Network

In [None]:
model_b = regression_model()

## Teaching the neural network and predicting

In [None]:
#empty list for mean squared errors
mses = np.array([])

#fit the model (50 times)
for i in range(1, 50):
    model_b.fit(X_train, y_train, epochs=50, verbose=0)
    y_prediction = model_b.predict(X_test)
    mses = np.append(mses, mean_squared_error(y_test, y_prediction)) #add the mean_squared_error on the mses list

#show array of mean_squared_errors
mses



array([117.05549102, 118.06931452, 103.63277216,  80.63291351,
        70.07170415,  73.33767411,  55.531507  ,  53.34457931,
        51.70518327,  50.21933355,  53.64678124,  52.13323968,
        48.64607272,  51.45103917,  48.9705277 ,  56.54374016,
        49.2047999 ,  56.98693135,  49.49121381,  48.48808365,
        48.50269613,  49.20685564,  51.8015686 ,  50.34790138,
        49.81435971,  49.38277369,  50.10125128,  49.69070556,
        50.66432113,  48.68735211,  48.99532008,  53.27816018,
        49.69058714,  49.03530934,  50.16012147,  50.57682894,
        49.07774895,  53.82819783,  53.20130042,  53.52583692,
        50.63834567,  52.87077536,  50.5296709 ,  45.67712296,
        45.46767276,  46.14001116,  47.51992986,  46.27416174,
        49.95863966])

## Error Mean and Standard Deviation

In [None]:
b_std = mses.std()
b_mean = mses.mean()
print("Error Mean: " + str(b_mean) + "\n"
     + "Standard Deviation: " + str(b_std))

Error Mean: 55.792008746021565
Standard Deviation: 16.006480776329933


## Mean of the mean squared errors compared to those of Step A.

In [None]:
print("Mean of Step A: " + str(a_mean) + "\n"
      + "Mean of Step B: " + str(b_mean) + "\n"
      + "Difference: " + str(a_mean - b_mean))

Mean of Step A: 67.21061839288004
Mean of Step B: 55.792008746021565
Difference: 11.41860964685847


**Normalizing the data, in this case, is more accurate**

# C. Increase the number of epochs - 100

In [None]:
model_c = regression_model()

# Training and predicting the model

In [None]:
#empty list for mean squared errors
mses = np.array([])

#fit the model (50 times)
for i in range(1, 50):
    model_c.fit(X_train, y_train, epochs=100, verbose=0)
    y_prediction = model_c.predict(X_test)
    mses = np.append(mses, mean_squared_error(y_test, y_prediction)) #add the mean_squared_error on the mses list

#show array of mean_squared_errors
mses



array([112.40835373,  89.35316475,  82.61908549,  75.51103495,
        70.98438169,  51.7063171 ,  50.27253773,  48.90789538,
        46.77726623,  44.51855661,  45.18721495,  46.58673416,
        46.07208593,  44.51387725,  47.82522717,  44.71057917,
        44.2461137 ,  45.08381249,  47.14735323,  43.94463385,
        46.00945219,  42.29251266,  42.74298791,  49.51364172,
        44.13576026,  44.62782972,  43.10073561,  41.97269493,
        44.10458277,  46.37954498,  41.22289485,  41.53548899,
        41.53409052,  42.19532616,  43.42081493,  41.55829665,
        40.76982733,  42.04405113,  40.75411795,  40.46136589,
        41.75558525,  41.82755865,  41.39691922,  44.49863948,
        42.6600255 ,  46.64207168,  40.47619704,  42.70486397,
        41.07955385])

## Error Mean and Standard Deviation

In [None]:
c_std = mses.std()
c_mean = mses.mean()
print("Error Mean: " + str(c_mean) + "\n"
     + "Standard Deviation: " + str(c_std))

Error Mean: 48.403952190114076
Standard Deviation: 13.801362359897317


## Mean of the mean squared errors compared to those of Step B

In [None]:
print("Error Mean of Step B: " + str(b_mean) + "\n"
      + "Error Mean of Step C: " + str(c_mean) + "\n"
      + "Difference: " + str(b_mean - c_mean))

Error Mean of Step B: 55.792008746021565
Error Mean of Step C: 48.403952190114076
Difference: 7.3880565559074896


**That increasing number of epochs, in this case, is more accurate**

# D. Increase the number of hidden layers (3 Layers)

In [None]:
def regression_model_d():
    # Define model type
    model = Sequential()

    # Define layers
    model.add(Dense(10, activation='relu', input_shape=(predictors_norm.shape[1],)))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1))

    # Compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model






## Build the model

In [None]:
model_d = regression_model_d()

## Teaching the neural network and predicting

In [None]:
# Empty list for mean squared errors
mses = np.array([])

# Fit the model (50 times)
for i in range(1, 50):
    model_d.fit(X_train, y_train, epochs=100, verbose=0)
    y_prediction = model_d.predict(X_test)
    mses = np.append(mses, mean_squared_error(y_test, y_prediction)) # Add the mean_squared_error to the mses list

# Show array of mean_squared_errors
print(mses)# Create an instance of the model


[101.15530121 106.9143213   86.11713986  70.1416211   50.36663012
  49.50613149  51.27899307  47.30230526  45.15997972  44.99411356
  50.07233148  47.80465419  49.21502759  42.74868028  41.72821734
  41.47658416  41.30664082  41.18244661  42.46891372  51.47935119
  42.12516122  48.4953418   44.38960828  41.41861097  42.58867615
  45.32660872  38.39046107  40.55863409  45.88980622  43.07210274
  38.22808179  37.72769151  39.03409207  37.91656249  37.20868261
  35.73152154  37.57335396  37.73685074  36.38962185  33.83969485
  34.66968299  35.41501318  39.131475    33.90040404  43.42254909
  34.81747842  36.89467092  35.28952494  42.59369741]


## Error Mean and Standard Deviation

In [None]:
d_std = mses.std()
d_mean = mses.mean()
print("Error Mean: " + str(d_mean) + "\n"
     + "Standard Deviation: " + str(d_std))

Error Mean: 45.759082545766525
Standard Deviation: 14.91795284959945


##  Mean of the mean squared errors compared to those of Step C

In [None]:
print("Error Mean of Step C: " + str(c_mean) + "\n"
      + "Error Mean of Step D: " + str(d_mean) + "\n"
      + "Difference: " + str(c_mean - d_mean))

Error Mean of Step C: 48.403952190114076
Error Mean of Step D: 45.759082545766525
Difference: 2.6448696443475512


**That increasing number of hidden layers, in this case, is more accurate**