This dataset is part of the [Farming Systems Project](https://www.ars.usda.gov/northeast-area/beltsville-md-barc/beltsville-agricultural-research-center/sustainable-agricultural-systems-laboratory/docs/farming-systems-project/) at USDA, Beltsville MD.  This data is not available online on the USDA
 website but can be found on my [GitHub](https://github.com/mmtokay/DATA606/tree/master/dataset)
* Julian Day 
* Month
* Day
* Date
* avgtTempC - average temperature in C
* maxTempC - maximum temperature in C
* minTempC - minimum temperature in C
* maxHumPct - maximum humidity in %
* minHumPct - minimum humidity in %
* avgRadWm-2 - average radiation in w/m2
* meanWindMs-1 - mean wind in m/s
* PrecipitationMm - precipitation/snow me# Model A

In [None]:
import io
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import re
import warnings
import time
import pathlib
import seaborn as sns
import tensorflow as tf
import tensorflow_docs as tfdocs
import tensorflow_docs.plots
import tensorflow_docs.modeling

from datetime import datetime, timedelta
from sklearn import linear_model
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LogisticRegression, LinearRegression, RidgeClassifier
from sklearn.metrics import *
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score, cross_val_predict, cross_validate
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, PolynomialFeatures, RobustScaler, Normalizer, MinMaxScaler, StandardScaler, Binarizer
from sklearn.tree import DecisionTreeRegressor, export_graphviz
from sklearn.utils import shuffle
from time import time
from tensorflow import keras
from tensorflow.keras import layers

print(tf.__version__)
warnings.simplefilter(action='ignore', category=FutureWarning)

In [None]:
def modelEvaluation(test_y, y_pred):
    # Mean absolute error regression loss (Best is 0)
    mae = mean_absolute_error(test_y, y_pred)
    print("Mean absolute error regression loss (Best is 0) = {:.5f}".format(mae))
    
    # Mean squared error
    mse = mean_squared_error(test_y, y_pred)
    print("Mean squared error (Best is 0) = {:.5f}".format(mse))
    
    # Median absolute error regression loss
    maerl = median_absolute_error(test_y, y_pred)
    print("Median absolute error regression loss or Mean absolute percentage error (Best is 0) = {:.5f}".format(maerl))
    
    # Coefficient of determination (Best is 1)
    r2 = r2_score(y_pred, test_y)
    print("Coefficient of determination (Best is 1) = {:.5f}".format(r2))

    a = plt.axes(aspect='equal')
    plt.scatter(test_y, y_pred)
    plt.xlabel('True Values [GrainYield]')
    plt.ylabel('Predictions [GrainYield]')
    lims = [0, 6000]
    plt.xlim(lims)
    plt.ylim(lims)
    _ = plt.plot(lims, lims)

def errorPlot(test_y, y_pred):
    error = y_pred - test_y
    plt.hist(error, bins = 25)
    plt.xlabel("Prediction Error [GrainYield]")
    _ = plt.ylabel("Count")

# Wheat - 31 weeks

In [None]:
dataWheat31w = pd.read_csv('./dataset/wheatFeatures31w.csv')
corrWheat31w = dataWheat31w.corr()
corrWheat31w.style.background_gradient(cmap='coolwarm').set_precision(2)

In [None]:
# Splitting data set
train_31w_X, test_31w_X = train_test_split(dataWheat31w.drop('GrainYield', axis=1), random_state=1)
train_31w_y, test_31w_y = train_test_split(dataWheat31w['GrainYield'], random_state=1)

# Apply Robust Scaler
scaler = RobustScaler()
train_scaler_31w_X = scaler.fit_transform(train_31w_X)
test_scaler_31w_X = scaler.transform(test_31w_X)

**Lasso**

In [None]:
lm = linear_model.Lasso(alpha=0.2)
lm.fit(train_scaler_31w_X,train_31w_y)
y_pred_31w_lm = lm.predict(test_scaler_31w_X)
modelEvaluation(test_31w_y, y_pred_31w_lm)

important_features = pd.Series(data=lm.coef_,index=dataWheat31w.drop('GrainYield', axis=1).columns)
important_features.sort_values(ascending=False,inplace=True)
print(important_features[:5])
print(important_features[-5:])

In [None]:
errorPlot(test_31w_y, y_pred_31w_lm)

**Decision Tree Regressor**

In [None]:
tree_model = DecisionTreeRegressor()
tree_model.fit(train_scaler_31w_X,train_31w_y)
y_pred_31w_dtr = tree_model.predict(test_scaler_31w_X)
modelEvaluation(test_31w_y, y_pred_31w_dtr)

mportant_features = pd.Series(data=tree_model.feature_importances_,index=dataWheat31w.drop('GrainYield', axis=1).columns)
important_features.sort_values(ascending=False,inplace=True)
print(important_features[:5])
print(important_features[-5:])

In [None]:
errorPlot(test_31w_y, y_pred_31w_dtr)

**Random Forest Regressor**

In [None]:
rf_model = RandomForestRegressor()
rf_model.fit(train_scaler_31w_X,train_31w_y)
y_pred_31w_rfr = rf_model.predict(test_scaler_31w_X)
modelEvaluation(test_31w_y, y_pred_31w_rfr)

important_features = pd.Series(data=rf_model.feature_importances_,index=dataWheat31w.drop('GrainYield', axis=1).columns)
important_features.sort_values(ascending=False,inplace=True)
print(important_features[:5])

In [None]:
errorPlot(test_31w_y, y_pred_31w_rfr)

In [None]:
max_depth = 25
n_est = 500
rf_model = RandomForestRegressor(n_estimators=n_est,max_depth=max_depth,random_state=0)
rf_model.fit(train_scaler_31w_X,train_31w_y)
y_pred_31w_rfrc = rf_model.predict(test_scaler_31w_X)
modelEvaluation(test_31w_y, y_pred_31w_rfrc)

important_features = pd.Series(data=rf_model.feature_importances_,index=dataWheat31w.drop('GrainYield', axis=1).columns)
important_features.sort_values(ascending=False,inplace=True)
print(important_features[:5])

In [None]:
errorPlot(test_31w_y, y_pred_31w_rfrc)

**Neural Network**

***Build the model***


Let's build our model. Here, we'll use a Sequential model with seven densely connected hidden layers, and an output layer that returns a single, continuous value. The model building steps are wrapped in a function, build_model, since we'll create a second model, later on.

In [None]:
def build_model():
    model = keras.Sequential([
        layers.Dense(128, activation='relu', input_shape=[len(train_31w_X.keys())]),
        layers.Dense(64, activation='relu'),
        layers.Dense(32, activation='relu'),
        layers.Flatten(),
        layers.Dense(64, activation='relu'),
        layers.Dense(32, activation='relu'),
        layers.Dense(16, activation='relu'),
        layers.Dense(1)
    ])
    optimizer = tf.keras.optimizers.RMSprop(0.001)

    model.compile(loss='mse',optimizer=optimizer,metrics=['mae', 'mse'])
    return model

In [None]:
model = build_model()

***Inspect the model***

Use the .summary method to print a simple description of the model

In [None]:
model.summary()

***Train the model***

Train the model for 200 epochs, and record the training and validation accuracy in the history object.

In [None]:
EPOCHS = 200
history = model.fit(train_scaler_31w_X, train_31w_y, epochs=EPOCHS, validation_split = 0.2, verbose=0, callbacks=[tfdocs.modeling.EpochDots()])

In [None]:
plotter = tfdocs.plots.HistoryPlotter(smoothing_std=2)

In [None]:
plotter.plot({'Basic': history}, metric = "mae")
plt.ylabel('MAE [GrainYield]')

In [None]:
plotter.plot({'Basic': history}, metric = "mse")
plt.ylabel('MSE [GrainYield^2]')

In [None]:
model = build_model()

# The patience parameter is the amount of epochs to check for improvement
early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)
early_history = model.fit(train_scaler_31w_X, train_31w_y, epochs=EPOCHS, validation_split = 0.2, verbose=0, callbacks=[early_stop, tfdocs.modeling.EpochDots()])

Let's see how well the model generalizes by using the test set, which we did not use when training the model. This tells us how well we can expect the model to predict when we use it in the real world.

In [None]:
loss, mae, mse = model.evaluate(test_scaler_31w_X, test_31w_y, verbose=2)
print("Testing set Mean Abs Error: {:5.2f} GrainYield".format(mae))

***Make predictions***

Finally, predict GrainYield values using data in the testing set:

In [None]:
y_pred_31w_nn = model.predict(test_scaler_31w_X).flatten()

It looks like our model predicts reasonably well. Let's take a look at the error distribution.

In [None]:
modelEvaluation(test_31w_y, y_pred_31w_nn)

In [None]:
errorPlot(test_31w_y, y_pred_31w_nn)

# Wheat - 30 weeks

In [None]:
dataWheat30w = pd.read_csv('./dataset/wheatFeatures30w.csv')
corrWheat30w = dataWheat30w.corr()
corrWheat30w.style.background_gradient(cmap='coolwarm').set_precision(2)

In [None]:
# Splitting data set
train_30w_X, test_30w_X = train_test_split(dataWheat30w.drop('GrainYield', axis=1), random_state=1)
train_30w_y, test_30w_y = train_test_split(dataWheat30w['GrainYield'], random_state=1)

# Apply Robust Scaler
scaler = RobustScaler()
train_scaler_30w_X = scaler.fit_transform(train_30w_X)
test_scaler_30w_X = scaler.transform(test_30w_X)

**Lasso**

In [None]:
lm = linear_model.Lasso(alpha=-0.05)
lm.fit(train_scaler_30w_X,train_30w_y)
y_pred_30w_lm = lm.predict(test_scaler_30w_X)
modelEvaluation(test_30w_y, y_pred_30w_lm)

important_features = pd.Series(data=lm.coef_,index=dataWheat30w.drop('GrainYield', axis=1).columns)
important_features.sort_values(ascending=False,inplace=True)
print(important_features[:5])
print(important_features[-5:])

In [None]:
errorPlot(test_30w_y, y_pred_30w_lm)

**Decision Tree Regressor**

In [None]:
tree_model = DecisionTreeRegressor()
tree_model.fit(train_scaler_30w_X,train_30w_y)
y_pred_30w_dtr = tree_model.predict(test_scaler_30w_X)
modelEvaluation(test_30w_y, y_pred_30w_dtr)

mportant_features = pd.Series(data=tree_model.feature_importances_,index=dataWheat30w.drop('GrainYield', axis=1).columns)
important_features.sort_values(ascending=False,inplace=True)
print(important_features[:5])
print(important_features[-5:])

In [None]:
errorPlot(test_30w_y, y_pred_30w_dtr)

**Random Forest Regressor**

In [None]:
rf_model = RandomForestRegressor()
rf_model.fit(train_scaler_30w_X,train_30w_y)
y_pred_30w_rfr = rf_model.predict(test_scaler_30w_X)
modelEvaluation(test_30w_y, y_pred_30w_rfr)

important_features = pd.Series(data=rf_model.feature_importances_,index=dataWheat30w.drop('GrainYield', axis=1).columns)
important_features.sort_values(ascending=False,inplace=True)
print(important_features[:5])

In [None]:
errorPlot(test_30w_y, y_pred_30w_rfr)

In [None]:
max_depth = 20
n_est = 700
rf_model = RandomForestRegressor(n_estimators=n_est,max_depth=max_depth,random_state=0)
rf_model.fit(train_scaler_30w_X,train_30w_y)
y_pred_30w_rfrc = rf_model.predict(test_scaler_30w_X)
modelEvaluation(test_30w_y, y_pred_30w_rfrc)

important_features = pd.Series(data=rf_model.feature_importances_,index=dataWheat30w.drop('GrainYield', axis=1).columns)
important_features.sort_values(ascending=False,inplace=True)
print(important_features[:5])

In [None]:
errorPlot(test_30w_y, y_pred_30w_rfrc)

`**Neural Network**

Build the model

Let's build our model. Here, we'll use a Sequential model with seven densely connected hidden layers, and an output layer that returns a single, continuous value. The model building steps are wrapped in a function, build_model, since we'll create a second model, later on.

In [None]:
def build_model():
    model = keras.Sequential([
        layers.Dense(128, activation='relu', input_shape=[len(train_30w_X.keys())]),
        layers.Dense(64, activation='relu'),
        layers.Dense(32, activation='relu'),
        layers.Flatten(),
        layers.Dense(64, activation='relu'),
        layers.Dense(32, activation='relu'),
        layers.Dense(16, activation='relu'),
        layers.Dense(1)
    ])
    optimizer = tf.keras.optimizers.RMSprop(0.001)

    model.compile(loss='mse',optimizer=optimizer,metrics=['mae', 'mse'])
    return model

In [None]:
model = build_model()

***Inspect the model***

Use the .summary method to print a simple description of the model

In [None]:
model.summary()

***Train the model***

Train the model for 200 epochs, and record the training and validation accuracy in the history object.

In [None]:
EPOCHS = 200
history = model.fit(train_scaler_30w_X, train_30w_y, epochs=EPOCHS, validation_split = 0.2, verbose=0, callbacks=[tfdocs.modeling.EpochDots()])

In [None]:
plotter = tfdocs.plots.HistoryPlotter(smoothing_std=2)

In [None]:
plotter.plot({'Basic': history}, metric = "mae")
plt.ylabel('MAE [GrainYield]')

In [None]:
plotter.plot({'Basic': history}, metric = "mse")
plt.ylabel('MSE [GrainYield^2]')

In [None]:
model = build_model()

# The patience parameter is the amount of epochs to check for improvement
early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)
early_history = model.fit(train_scaler_30w_X, train_30w_y, epochs=EPOCHS, validation_split = 0.2, verbose=0, callbacks=[early_stop, tfdocs.modeling.EpochDots()])

In [None]:
loss, mae, mse = model.evaluate(test_scaler_30w_X, test_30w_y, verbose=2)
print("Testing set Mean Abs Error: {:5.2f} GrainYield".format(mae))

In [None]:
y_pred_30w_nn = model.predict(test_scaler_30w_X).flatten()
modelEvaluation(test_30w_y, y_pred_30w_nn)

In [None]:
errorPlot(test_30w_y, y_pred_30w_nn)

# Wheat - 29 weeks

In [None]:
dataWheat29w = pd.read_csv('./dataset/wheatFeatures29w.csv')
corrWheat29w = dataWheat29w.corr()
corrWheat29w.style.background_gradient(cmap='coolwarm').set_precision(2)

In [None]:
# Splitting data set
train_29w_X, test_29w_X = train_test_split(dataWheat29w.drop('GrainYield', axis=1), random_state=1)
train_29w_y, test_29w_y = train_test_split(dataWheat29w['GrainYield'], random_state=1)

# Apply Robust Scaler
scaler = RobustScaler()
train_scaler_29w_X = scaler.fit_transform(train_29w_X)
test_scaler_29w_X = scaler.transform(test_29w_X)

**Lasso**

In [None]:
lm = linear_model.Lasso(alpha=0.05)
lm.fit(train_scaler_29w_X,train_29w_y)
y_pred_29w_lm = lm.predict(test_scaler_29w_X)
modelEvaluation(test_29w_y, y_pred_29w_lm)

important_features = pd.Series(data=lm.coef_,index=dataWheat29w.drop('GrainYield', axis=1).columns)
important_features.sort_values(ascending=False,inplace=True)
print(important_features[:5])
print(important_features[-5:])

In [None]:
errorPlot(test_29w_y, y_pred_29w_lm)

**Decision Tree Regressor**

In [None]:
tree_model = DecisionTreeRegressor()
tree_model.fit(train_scaler_29w_X,train_29w_y)
y_pred_29w_dtr = tree_model.predict(test_scaler_29w_X)
modelEvaluation(test_29w_y, y_pred_29w_dtr)

mportant_features = pd.Series(data=tree_model.feature_importances_,index=dataWheat29w.drop('GrainYield', axis=1).columns)
important_features.sort_values(ascending=False,inplace=True)
print(important_features[:5])
print(important_features[-5:])

In [None]:
errorPlot(test_29w_y, y_pred_29w_dtr)

**Random Forest Regressor**

In [None]:
rf_model = RandomForestRegressor()
rf_model.fit(train_scaler_29w_X,train_29w_y)
y_pred_29w_rfr = rf_model.predict(test_scaler_29w_X)
modelEvaluation(test_29w_y, y_pred_29w_rfr)

important_features = pd.Series(data=rf_model.feature_importances_,index=dataWheat29w.drop('GrainYield', axis=1).columns)
important_features.sort_values(ascending=False,inplace=True)
print(important_features[:5])

In [None]:
errorPlot(test_29w_y, y_pred_29w_rfr)

In [None]:
max_depth = 25
n_est = 600
rf_model = RandomForestRegressor(n_estimators=n_est,max_depth=max_depth,random_state=0)
rf_model.fit(train_scaler_29w_X,train_29w_y)
y_pred_29w_rfrc = rf_model.predict(test_scaler_29w_X)
modelEvaluation(test_29w_y, y_pred_29w_rfrc)

important_features = pd.Series(data=rf_model.feature_importances_,index=dataWheat29w.drop('GrainYield', axis=1).columns)
important_features.sort_values(ascending=False,inplace=True)
print(important_features[:5])

In [None]:
errorPlot(test_29w_y, y_pred_29w_rfrc)

**Neural Network**

***Build the model***


Let's build our model. Here, we'll use a Sequential model with seven densely connected hidden layers, and an output layer that returns a single, continuous value. The model building steps are wrapped in a function, build_model, since we'll create a second model, later on.

In [None]:
def build_model():
    model = keras.Sequential([
        layers.Dense(128, activation='relu', input_shape=[len(train_29w_X.keys())]),
        layers.Dense(64, activation='relu'),
        layers.Dense(32, activation='relu'),
        layers.Flatten(),
        layers.Dense(64, activation='relu'),
        layers.Dense(32, activation='relu'),
        layers.Dense(1)
    ])
    optimizer = tf.keras.optimizers.RMSprop(0.001)

    model.compile(loss='mse',optimizer=optimizer,metrics=['mae', 'mse'])
    return model

In [None]:
model = build_model()

***Inspect the model***

Use the .summary method to print a simple description of the model

In [None]:
model.summary()

***Train the model***

Train the model for 200 epochs, and record the training and validation accuracy in the history object.

In [None]:
EPOCHS = 200
history = model.fit(train_scaler_29w_X, train_29w_y, epochs=EPOCHS, validation_split = 0.2, verbose=0, callbacks=[tfdocs.modeling.EpochDots()])

In [None]:
plotter = tfdocs.plots.HistoryPlotter(smoothing_std=2)

In [None]:
plotter.plot({'Basic': history}, metric = "mae")
plt.ylabel('MAE [GrainYield]')

In [None]:
plotter.plot({'Basic': history}, metric = "mse")
plt.ylabel('MSE [GrainYield^2]')

In [None]:
model = build_model()

# The patience parameter is the amount of epochs to check for improvement
early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=30)
early_history = model.fit(train_scaler_29w_X, train_29w_y, epochs=EPOCHS, validation_split = 0.2, verbose=0, callbacks=[early_stop, tfdocs.modeling.EpochDots()])

In [None]:
loss, mae, mse = model.evaluate(test_scaler_29w_X, test_29w_y, verbose=2)
print("Testing set Mean Abs Error: {:5.2f} GrainYield".format(mae))

In [None]:
y_pred_29w_nn = model.predict(test_scaler_29w_X).flatten()
modelEvaluation(test_29w_y, y_pred_29w_nn)

In [None]:
errorPlot(test_29w_y, y_pred_29w_nn)

# Wheat - 28 weeks

In [None]:
dataWheat28w = pd.read_csv('./dataset/wheatFeatures28w.csv')
corrWheat28w = dataWheat28w.corr()
corrWheat28w.style.background_gradient(cmap='coolwarm').set_precision(2)

In [None]:
# Splitting data set
train_28w_X, test_28w_X = train_test_split(dataWheat28w.drop('GrainYield', axis=1), random_state=1)
train_28w_y, test_28w_y = train_test_split(dataWheat28w['GrainYield'], random_state=1)

# Apply Robust Scaler
scaler = RobustScaler()
train_scaler_28w_X = scaler.fit_transform(train_28w_X)
test_scaler_28w_X = scaler.transform(test_28w_X)

**Lasso**

In [None]:
lm = linear_model.Lasso(alpha=0.05)
lm.fit(train_scaler_28w_X,train_28w_y)
y_pred_28w_lm = lm.predict(test_scaler_28w_X)
modelEvaluation(test_28w_y, y_pred_28w_lm)

important_features = pd.Series(data=lm.coef_,index=dataWheat28w.drop('GrainYield', axis=1).columns)
important_features.sort_values(ascending=False,inplace=True)
print(important_features[:5])
print(important_features[-5:])

In [None]:
errorPlot(test_28w_y, y_pred_28w_lm)

**Decision Tree Regressor**

In [None]:
tree_model = DecisionTreeRegressor()
tree_model.fit(train_scaler_28w_X,train_28w_y)
y_pred_28w_dtr = tree_model.predict(test_scaler_28w_X)
modelEvaluation(test_28w_y, y_pred_28w_dtr)

mportant_features = pd.Series(data=tree_model.feature_importances_,index=dataWheat28w.drop('GrainYield', axis=1).columns)
important_features.sort_values(ascending=False,inplace=True)
print(important_features[:5])
print(important_features[-5:])

In [None]:
errorPlot(test_28w_y, y_pred_28w_dtr)

**Random Forest Regressor**

In [None]:
rf_model = RandomForestRegressor()
rf_model.fit(train_scaler_28w_X,train_28w_y)
y_pred_28w_rfr = rf_model.predict(test_scaler_28w_X)
modelEvaluation(test_28w_y, y_pred_28w_rfr)

important_features = pd.Series(data=rf_model.feature_importances_,index=dataWheat28w.drop('GrainYield', axis=1).columns)
important_features.sort_values(ascending=False,inplace=True)
print(important_features[:5])

In [None]:
errorPlot(test_28w_y, y_pred_28w_rfr)

In [None]:
max_depth = 25
n_est = 600
rf_model = RandomForestRegressor(n_estimators=n_est,max_depth=max_depth,random_state=0)
rf_model.fit(train_scaler_28w_X,train_28w_y)
y_pred_28w_rfrc = rf_model.predict(test_scaler_28w_X)
modelEvaluation(test_28w_y, y_pred_28w_rfrc)

important_features = pd.Series(data=rf_model.feature_importances_,index=dataWheat28w.drop('GrainYield', axis=1).columns)
important_features.sort_values(ascending=False,inplace=True)
print(important_features[:5])

In [None]:
errorPlot(test_28w_y, y_pred_28w_rfrc)

**Neural Network**

***Build the model***


Let's build our model. Here, we'll use a Sequential model with seven densely connected hidden layers, and an output layer that returns a single, continuous value. The model building steps are wrapped in a function, build_model, since we'll create a second model, later on.

In [None]:
def build_model():
    model = keras.Sequential([
        layers.Dense(128, activation='relu', input_shape=[len(train_28w_X.keys())]),
        layers.Dense(64, activation='relu'),
        layers.Dense(32, activation='relu'),
        layers.Flatten(),
        layers.Dense(64, activation='relu'),
        layers.Dense(32, activation='relu'),
        layers.Dense(1)
    ])
    optimizer = tf.keras.optimizers.RMSprop(0.001)

    model.compile(loss='mse',optimizer=optimizer,metrics=['mae', 'mse'])
    return model

In [None]:
model = build_model()

***Inspect the model***

Use the .summary method to print a simple description of the model

In [None]:
model.summary()

***Train the model***

Train the model for 200 epochs, and record the training and validation accuracy in the history object.

In [None]:
EPOCHS = 200
history = model.fit(train_scaler_28w_X, train_28w_y, epochs=EPOCHS, validation_split = 0.2, verbose=0, callbacks=[tfdocs.modeling.EpochDots()])

In [None]:
plotter = tfdocs.plots.HistoryPlotter(smoothing_std=2)

In [None]:
plotter.plot({'Basic': history}, metric = "mae")
plt.ylabel('MAE [GrainYield]')

In [None]:
plotter.plot({'Basic': history}, metric = "mse")
plt.ylabel('MSE [GrainYield^2]')

In [None]:
model = build_model()

# The patience parameter is the amount of epochs to check for improvement
early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=30)
early_history = model.fit(train_scaler_28w_X, train_28w_y, epochs=EPOCHS, validation_split = 0.2, verbose=0, callbacks=[early_stop, tfdocs.modeling.EpochDots()])

In [None]:
loss, mae, mse = model.evaluate(test_scaler_28w_X, test_28w_y, verbose=2)
print("Testing set Mean Abs Error: {:5.2f} GrainYield".format(mae))

In [None]:
y_pred_28w_nn = model.predict(test_scaler_28w_X).flatten()
modelEvaluation(test_28w_y, y_pred_28w_nn)

In [None]:
errorPlot(test_28w_y, y_pred_28w_nn)

In [None]:
data = np.array([[median_absolute_error(test_28w_y, y_pred_28w_lm), median_absolute_error(test_29w_y, y_pred_29w_lm), median_absolute_error(test_30w_y, y_pred_30w_lm), median_absolute_error(test_31w_y, y_pred_31w_lm)],
                 [median_absolute_error(test_28w_y, y_pred_28w_dtr), median_absolute_error(test_29w_y, y_pred_29w_dtr), median_absolute_error(test_30w_y, y_pred_30w_dtr), median_absolute_error(test_31w_y, y_pred_31w_dtr)],
                 [median_absolute_error(test_28w_y, y_pred_28w_rfr), median_absolute_error(test_29w_y, y_pred_29w_rfr), median_absolute_error(test_30w_y, y_pred_30w_rfr), median_absolute_error(test_31w_y, y_pred_31w_rfr)],
                 [median_absolute_error(test_28w_y, y_pred_28w_rfrc), median_absolute_error(test_29w_y, y_pred_29w_rfrc), median_absolute_error(test_30w_y, y_pred_30w_rfrc), median_absolute_error(test_31w_y, y_pred_31w_rfrc)],
                 [median_absolute_error(test_28w_y, y_pred_28w_nn), median_absolute_error(test_29w_y, y_pred_29w_nn), median_absolute_error(test_30w_y, y_pred_30w_nn), median_absolute_error(test_31w_y, y_pred_31w_nn)]])
length = len(data)
x_labels = ['Lasso', 'Decision Tree', 'Random Forest', 'Random Forest C', 'Neural Network']

# Set plot parameters
fig, ax = plt.subplots()
fig.set_size_inches(7,5)

width = 0.2 # width of bar
x = np.arange(length)

ax.bar(x, data[:,0], width, color='#000080', label='28 weeks')
ax.bar(x + width, data[:,1], width, color='#0F52BA', label='29 weeks')
ax.bar(x + (2 * width), data[:,2], width, color='#6593F5', label='30 weeks')
ax.bar(x + (3 * width), data[:,3], width, color='#93b4fa', label='31 weeks')

ax.set_ylim(300,455)
ax.set_xticks(x + width + width/2)
ax.set_xticklabels(x_labels)
ax.set_ylabel('Grain Yield (kg/ha)')
ax.set_xlabel('Regression Model')
ax.set_title('Median Absolute Percentage Error (MAPE)')
ax.legend()
plt.grid(True, 'major', 'y', ls='--', lw=.5, c='k', alpha=.3)

fig.tight_layout()
plt.show()

In [None]:
data = np.array([[r2_score(y_pred_28w_lm, test_28w_y), r2_score(y_pred_29w_lm, test_29w_y), r2_score(y_pred_30w_lm, test_30w_y), r2_score(y_pred_31w_lm, test_31w_y)],
                 [r2_score(y_pred_28w_dtr, test_28w_y), r2_score(y_pred_29w_dtr, test_29w_y), r2_score(y_pred_30w_dtr, test_30w_y), r2_score(y_pred_31w_dtr, test_31w_y)],
                 [r2_score(y_pred_28w_rfr, test_28w_y), r2_score(y_pred_29w_rfr, test_29w_y), r2_score(y_pred_30w_rfr, test_30w_y), r2_score(y_pred_31w_rfr, test_31w_y)],
                 [r2_score(y_pred_28w_rfrc, test_28w_y), r2_score(y_pred_29w_rfrc, test_29w_y), r2_score(y_pred_30w_rfrc, test_30w_y), r2_score(y_pred_31w_rfrc, test_31w_y)],
                 [r2_score(y_pred_28w_nn, test_28w_y), r2_score(y_pred_29w_nn, test_29w_y), r2_score(y_pred_30w_nn, test_30w_y), r2_score(y_pred_31w_nn, test_31w_y)]])
length = len(data)
x_labels = ['Lasso', 'Decision Tree', 'Random Forest', 'Random Forest C', 'Neural Network']

# Set plot parameters
fig, ax = plt.subplots()
fig.set_size_inches(7,5)
width = 0.2 # width of bar
x = np.arange(length)

ax.bar(x, data[:,0], width, color='#000080', label='28 weeks')
ax.bar(x + width, data[:,1], width, color='#0F52BA', label='29 weeks')
ax.bar(x + (2 * width), data[:,2], width, color='#6593F5', label='30 weeks')
ax.bar(x + (3 * width), data[:,3], width, color='#93b4fa', label='31 weeks')

ax.set_ylim(0.49,0.75)
ax.set_xticks(x + width + width/2)
ax.set_xticklabels(x_labels)
ax.set_xlabel('Regression Model')
ax.set_title('Coefficient of Determination (R^2)')
ax.legend()
plt.grid(True, 'major', 'y', ls='--', lw=.5, c='k', alpha=.3)

fig.tight_layout()
plt.show()