# Problem 3: Feedforward Neural Networks for Regression

In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.neural_network import MLPRegressor

# ===== Optional : import other libraries here ===== #
from sklearn.metrics import mean_squared_error

# ===== End of Optional : import other libraries here ===== #

## P3(a) Download and load dataset

- Download the concrete compressive strength dataset from UCI Machine Learning Repository from [link](http://archive.ics.uci.edu/ml/datasets/Concrete+Compressive+Strength).
- Extract and put `Concrete_Data.xls` under directory `data/`.
- Pass the code block below to verify download.

In [2]:
df = None
try:
    df = pd.read_excel('./data/Concrete_Data.xls')
    print(f">>> Available features:", list(df.columns))
    print(f">>> Example 3 data points: \n", df.head(3))
except:
    raise Warning(f">>> Your dataset is NOT ready for the next step. Fix this first.")

>>> Available features: ['Cement (component 1)(kg in a m^3 mixture)', 'Blast Furnace Slag (component 2)(kg in a m^3 mixture)', 'Fly Ash (component 3)(kg in a m^3 mixture)', 'Water  (component 4)(kg in a m^3 mixture)', 'Superplasticizer (component 5)(kg in a m^3 mixture)', 'Coarse Aggregate  (component 6)(kg in a m^3 mixture)', 'Fine Aggregate (component 7)(kg in a m^3 mixture)', 'Age (day)', 'Concrete compressive strength(MPa, megapascals) ']
>>> Example 3 data points: 
    Cement (component 1)(kg in a m^3 mixture)  \
0                                      540.0   
1                                      540.0   
2                                      332.5   

   Blast Furnace Slag (component 2)(kg in a m^3 mixture)  \
0                                                0.0       
1                                                0.0       
2                                              142.5       

   Fly Ash (component 3)(kg in a m^3 mixture)  \
0                                        

## P3(b) Split the dataset
- Pick the first 730 data points as the training set and the last 300 points as the test set.
- Use `Concrete compressive strength(MPa, megapascals) ` column as label (i.e., y).
- Your code should pass assertions at the end of code block. Do not proceed before pass.

In [3]:
x_train, y_train, x_test, y_test = None, None, None, None

# ===== Split training and test dataset ===== #
x_train, y_train, x_test, y_test = (
    df.iloc[:730, :-1],  # Features for training set
    df.iloc[:730, -1],   # Labels for training set
    df.iloc[-300:, :-1],  # Features for test set
    df.iloc[-300:, -1]    # Labels for test set
)
# ===== End of Split training and test dataset ===== #

# Convert to numpy array
x_train = x_train.to_numpy()
x_test = x_test.to_numpy()
y_train = y_train.to_numpy()
y_test = y_test.to_numpy()

assert(x_train.shape == (730, 8))
assert(x_test.shape == (300, 8))
assert(y_train.shape == (730,) or (y_train.shape[0] == 730))
assert(y_test.shape == (300,) or (y_test.shape[0] == 300))
print(f">>> P3(b) passed.")

>>> P3(b) passed.


## P3(c) Implement a neural network 
- Use a single layer with `early-stopping=False`.
- Use Trial and Error strategy to find the optimal network structure that yields the lowest test error.
- Your code should reflect your multiple trials and then report the optimal configurations.
- If you encounter warning such as "ConvergenceWarning", consider enlarge `max_iter` parameter in `MLPRegressor`.

In [36]:
# ===== Define parameters for trial and error ===== #
# We will loop through your defined available settings.
# Note that the settings are not limited to below. Feel free to tune other parameters but we won't test below.
# Refer to https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPRegressor.html
hidden_layer_sizes = [(5,), (10,), (20,)] # example: [(1)], note that it must be single layer
activation = ['relu', 'tanh', 'identity', 'logistic'] # example: ['relu'], you need to exhaust this attribute
solver = ['adam'] # example: ['adam'], you need to exhaust this attribute
alpha = [1e-5, 1e-4, 1e-3, 1e-2, 1e-1] # example: [1e-3]
learning_rates = [0.01, 0.001]
# ===== End of Define parameters for trial and error ===== #

# This is to prevent unaffordable time complexity
# If not passed, try eliminate some choices upon submission. You can comment this out during implementation.
# assert(len(hidden_layer_sizes) * len(activation) * len(solver) * len(alpha) < 500)

best_test_error = np.inf
best_settings = {
    "hidden_layer_sizes": None,
    "activation": None,
    "solver": None,
    "alpha": None,
    "batch_size": None,
    "learning_rate_init": None
}
# Loop through parameters
for h in hidden_layer_sizes:
    for a in activation:
        for s in solver:
            for al in alpha:
                # ===== Implement a network with iterated settings ===== #
                # Note: set validation_fraction to 0.1 or leave as default
                model = MLPRegressor(
                    hidden_layer_sizes=h,
                    activation=a,
                    solver=s,
                    alpha=al,
                    random_state=42,
                    early_stopping=False
                )
                # ===== End of Implement a network with iterated settings ===== #
                
                # ===== Train network ===== #
                
                model.fit(x_train, y_train)
                
                # ===== End of Train network ===== #
                
                # ===== Test network ===== #
                
                y_pred = model.predict(x_test)
                
                # ===== End of Test network ===== #
                
                # ===== Compute mean squared error ===== #
                
                test_error = mean_squared_error(y_test, y_pred)
                
                # ===== End of Compute mean squared error ===== #
                
                # ===== Is it the best setting ===== #
                
                if test_error < best_test_error:
                    best_test_error = test_error
                    best_settings = {
                        "hidden_layer_sizes": h,
                        "activation": a,
                        "solver": s,
                        "alpha": al,
                        "batch_size": model.batch_size,
                        "learning_rate_init": model.learning_rate_init
                    }
                
                # ===== End of Is it the best setting ===== #





In [37]:
# Report best settings
print(f">>> best_test_error={best_test_error}")
print(f">>> best_settings={best_settings}")

>>> best_test_error=57.652967449633394
>>> best_settings={'hidden_layer_sizes': (20,), 'activation': 'relu', 'solver': 'adam', 'alpha': 0.0001, 'batch_size': 'auto', 'learning_rate_init': 0.001}


# Extra Credit
- `early_stopping=True`
- Tune validation rate
- Your performance must beat part 3 to receive credits.

In [44]:
# ===== Define parameters for trial and error ===== #
# We will loop through your defined available settings.
hidden_layer_sizes = [(5,), (10,), (20,)] # example: [(1)], note that it must be single layer
activation = ['relu', 'tanh', 'identity', 'logistic'] # example: ['relu'], you need to exhaust this attribute
solver = ['adam'] # example: ['adam'], you need to exhaust this attribute
alpha = [1e-5, 1e-4, 1e-3, 1e-2, 1e-1] # example: [1e-3]
validation_rates = [0.1, 0.2, 0.3] # example: [.1]
# ===== End of Define parameters for trial and error ===== #

# This is to prevent unaffordable time complexity
# If not passed, try eliminate some choices upon submission. You can comment this out during implementation.
# assert(len(hidden_layer_sizes) * len(activation) * len(solver) * len(alpha) * len(validation_rates) < 1000)

best_test_error = np.inf
best_settings = {
    "hidden_layer_sizes": None,
    "activation": None,
    "solver": None,
    "alpha": None,
    "batch_size": None,
    "learning_rate_init": None,
    "validation_rates": None
}
# Loop through parameters
for h in hidden_layer_sizes:
    for a in activation:
        for s in solver:
            for al in alpha:
                for v in validation_rates:
                    # ===== Implement a network with iterated settings ===== #
                    # Note: set validation_fraction to 0.1 or leave as default
                    model = MLPRegressor(
                        hidden_layer_sizes=h,
                        activation=a,
                        solver=s,
                        alpha=al,
                        random_state=42,
                        early_stopping=True,
                        validation_fraction=v
                    )
                    # ===== End of Implement a network with iterated settings ===== #
                    
                    # ===== Train network ===== #
                    model.fit(x_train, y_train)
                    # ===== End of Train network ===== #
                    
                    # ===== Test network ===== #
                    y_pred = model.predict(x_test)
                    # ===== End of Test network ===== #
                    
                    # ===== Compute mean squared error ===== #
                    test_error = mean_squared_error(y_test, y_pred)
                    # ===== End of Compute mean squared error ===== #
                    
                    # ===== Is it the best setting ===== #
                    if test_error < best_test_error:
                        best_test_error = test_error
                        best_settings = {
                            "hidden_layer_sizes": h,
                            "activation": a,
                            "solver": s,
                            "alpha": al,
                            "batch_size": model.batch_size,
                            "learning_rate_init": model.learning_rate_init,
                            "validation_rates": v
                        }
                    # ===== End of Is it the best setting ===== #











In [46]:
# Report best settings
print(f">>> best_test_error={best_test_error}")
print(f">>> best_settings={best_settings}")

>>> best_test_error=49.19071889099196
>>> best_settings={'hidden_layer_sizes': (20,), 'activation': 'relu', 'solver': 'adam', 'alpha': 0.0001, 'batch_size': 'auto', 'learning_rate_init': 0.001, 'validation_rates': 0.1}
