# Load the provided data

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("concrete_data.csv")
df_y = df["Strength"]
df_X = df.drop(["Strength"], axis=1)

# Part B. Normalize the data

In [3]:
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
from keras.losses import mean_squared_error

In [4]:
# Instantiate a model
model = Sequential()

In [5]:
# Add a single hidden layer of 10 nodes, and a ReLU activation function
model.add(Dense(10, input_dim=8, activation="relu"))
model.add(Dense(1, activation="linear"))  # Output layer for linear regression output

In [6]:
# Compile the model using Adam optimizer and loss mean_squared_error
model.compile(loss=mean_squared_error, optimizer=Adam(learning_rate=0.01))

# Normalize the data before splitting

In [7]:
# For each column, calculate mean and standard deviation and then for each value, subtract mean and divide by standard deviation

In [8]:
for column in df_X.columns:
    # Calculate mean and standard deviation of each column
    mean = df_X[column].mean()
    std = df_X[column].std()
    # Normalize the data
    df_X[column] = df_X[column].apply(lambda x : (x - mean) / std)
    
df_X

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597
2,0.491187,0.795140,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.551340
3,0.491187,0.795140,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569,4.976069
...,...,...,...,...,...,...,...,...
1025,-0.045623,0.487998,0.564271,-0.092126,0.451190,-1.322363,-0.065861,-0.279597
1026,0.392628,-0.856472,0.959602,0.675872,0.702285,-1.993711,0.496651,-0.279597
1027,-1.269472,0.759210,0.850222,0.521336,-0.017520,-1.035561,0.080068,-0.279597
1028,-1.168042,1.307430,-0.846733,-0.279443,0.852942,0.214537,0.191074,-0.279597


# Train using the code in PART A

# 1. Randomly split the data into a training and test sets by holding 30% of the data for testing. You can use the train_test_split helper function from Scikit-learn.

In [9]:
from sklearn.model_selection import train_test_split

In [10]:
X_train, X_test, y_train, y_test = train_test_split(df_X, df_y, test_size=0.3, random_state=42)

# 2. Train the model on the training data using 50 epochs

In [11]:
history = model.fit(X_train, y_train, epochs=50, verbose=0)

# 3.  Evaluate the model on the test data and compute the mean squared error between the predicted concrete strength and the actual concrete strength. You can use the mean_squared_error function from Scikit-learn.

In [12]:
model.evaluate(X_test, y_test)



74.65264129638672

In [13]:
# Answer : Acceptable loss on the test set 

# 4. Repeat steps 1 - 3, 50 times, i.e., create a list of 50 mean squared errors.

In [14]:
mean_squared_error_list =  [] # List to hold all the mean_squared_erros

for step_number in range(50):
    X_train, X_test, y_train, y_test = train_test_split(df_X, df_y, test_size=0.3)
    history = model.fit(X_train, y_train, epochs=50, verbose=0)
    step_mean_squared_error = model.evaluate(X_test, y_test)
    mean_squared_error_list.append(step_mean_squared_error)



# 5. Report the mean and the standard deviation of the mean squared errors.

In [15]:
mean_of_mse = sum(mean_squared_error_list) / len(mean_squared_error_list)
standard_deviation_of_mse = sum([((x - mean_of_mse) ** 2) for x in mean_squared_error_list]) / len(mean_squared_error_list)

print("Mean of mean squared errors is : ",mean_of_mse)
print("Standard Deviation of mean squared errors is : ", standard_deviation_of_mse)

Mean of mean squared errors is :  42.568669662475585
Standard Deviation of mean squared errors is :  22.050604691646644
