This dataset is part of the [Farming Systems Project](https://www.ars.usda.gov/northeast-area/beltsville-md-barc/beltsville-agricultural-research-center/sustainable-agricultural-systems-laboratory/docs/farming-systems-project/) at USDA, Beltsville MD.  This data is not available online on the USDA
 website but can be found on my [GitHub](https://github.com/mmtokay/DATA606/tree/master/dataset)
* Julian Day 
* Month
* Day
* Date
* avgtTempC - average temperature in C
* maxTempC - maximum temperature in C
* minTempC - minimum temperature in C
* maxHumPct - maximum humidity in %
* minHumPct - minimum humidity in %
* avgRadWm-2 - average radiation in w/m2
* meanWindMs-1 - mean wind in m/s
* PrecipitationMm - precipitation/snow me# Model A

In [None]:
import io
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import re
import warnings
import time
import pathlib
import seaborn as sns
import tensorflow as tf
import tensorflow_docs as tfdocs
import tensorflow_docs.plots
import tensorflow_docs.modeling

from datetime import datetime, timedelta
from sklearn import linear_model
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LogisticRegression, LinearRegression, RidgeClassifier
from sklearn.metrics import *
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score, cross_val_predict, cross_validate
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, PolynomialFeatures, RobustScaler, Normalizer, MinMaxScaler, StandardScaler, Binarizer
from sklearn.tree import DecisionTreeRegressor, export_graphviz
from sklearn.utils import shuffle
from time import time
from tensorflow import keras
from tensorflow.keras import layers

print(tf.__version__)
warnings.simplefilter(action='ignore', category=FutureWarning)

In [None]:
def modelEvaluation(test_y, y_pred):
    # Mean absolute error regression loss (Best is 0)
    mae = mean_absolute_error(test_y, y_pred)
    print("Mean absolute error regression loss (Best is 0) = {:.5f}".format(mae))
    
    # Mean squared error
    mse = mean_squared_error(test_y, y_pred)
    print("Mean squared error (Best is 0) = {:.5f}".format(mse))
    
    # Median absolute error regression loss
    maerl = median_absolute_error(test_y, y_pred)
    print("Median absolute error regression loss or Mean absolute percentage error (Best is 0) = {:.5f}".format(maerl))
    
    # Coefficient of determination (Best is 1)
    r2 = r2_score(y_pred, test_y)
    print("Coefficient of determination (Best is 1) = {:.5f}".format(r2))

    a = plt.axes(aspect='equal')
    plt.scatter(test_y, y_pred)
    plt.xlabel('True Values [GrainYield]')
    plt.ylabel('Predictions [GrainYield]')
    lims = [0, 13000]
    plt.xlim(lims)
    plt.ylim(lims)
    _ = plt.plot(lims, lims)


def errorPlot(test_y, y_pred):
    error = y_pred - test_y
    plt.hist(error, bins = 25)
    plt.xlabel("Prediction Error [GrainYield]")
    _ = plt.ylabel("Count")

# Corn - 16 weeks

In [None]:
dataCorn16w = pd.read_csv('./dataset/cornFeatures16w.csv')
corrCorn16w = dataCorn16w.corr()
corrCorn16w.style.background_gradient(cmap='coolwarm').set_precision(2)

In [None]:
# Splitting data set
train_16w_X, test_16w_X = train_test_split(dataCorn16w.drop('GrainYield', axis=1), random_state=1)
train_16w_y, test_16w_y = train_test_split(dataCorn16w['GrainYield'], random_state=1)

# Apply Robust Scaler
scaler = RobustScaler()
train_scaler_16w_X = scaler.fit_transform(train_16w_X)
test_scaler_16w_X = scaler.transform(test_16w_X)

**Lasso**

In [None]:
lm = linear_model.Lasso(alpha=0.9)
lm.fit(train_scaler_16w_X,train_16w_y)
y_pred_16w_lm = lm.predict(test_scaler_16w_X)

modelEvaluation(test_16w_y, y_pred_16w_lm)
important_features = pd.Series(data=lm.coef_,index=dataCorn16w.drop('GrainYield', axis=1).columns)
important_features.sort_values(ascending=False,inplace=True)
print(important_features[:5])
print(important_features[-5:])

In [None]:
errorPlot(test_16w_y, y_pred_16w_lm)

**Decision Tree Regressor**

In [None]:
tree_model = DecisionTreeRegressor()
tree_model.fit(train_scaler_16w_X,train_16w_y)
y_pred_16w_dtr = tree_model.predict(test_scaler_16w_X)

modelEvaluation(test_16w_y, y_pred_16w_dtr)
important_features = pd.Series(data=tree_model.feature_importances_,index=dataCorn16w.drop('GrainYield', axis=1).columns)
important_features.sort_values(ascending=False,inplace=True)
print(important_features[:5])
print(important_features[-5:])

In [None]:
errorPlot(test_16w_y, y_pred_16w_dtr)

**Random Forest Regressor**

In [None]:
rf_model = RandomForestRegressor()
rf_model.fit(train_scaler_16w_X,train_16w_y)

y_pred_16w_rfr = rf_model.predict(test_scaler_16w_X)
modelEvaluation(test_16w_y, y_pred_16w_rfr)

important_features = pd.Series(data=rf_model.feature_importances_,index=dataCorn16w.drop('GrainYield', axis=1).columns)
important_features.sort_values(ascending=False,inplace=True)
print(important_features[:5])
print(important_features[-5:])

In [None]:
errorPlot(test_16w_y, y_pred_16w_rfr)

In [None]:
max_depth = 30
n_est = 200
rf_model = RandomForestRegressor(n_estimators=n_est,max_depth=max_depth,random_state=0)
rf_model.fit(train_scaler_16w_X,train_16w_y)
y_pred_16w_rfrc = rf_model.predict(test_scaler_16w_X)
modelEvaluation(test_16w_y, y_pred_16w_rfrc)

important_features = pd.Series(data=rf_model.feature_importances_,index=dataCorn16w.drop('GrainYield', axis=1).columns)
important_features.sort_values(ascending=False,inplace=True)
print(important_features[:5])
print(important_features[-5:])

In [None]:
errorPlot(test_16w_y, y_pred_16w_rfrc)

**Neural Network**

***Build the model***


Let's build our model. Here, we'll use a Sequential model with seven densely connected hidden layers, and an output layer that returns a single, continuous value. The model building steps are wrapped in a function, build_model, since we'll create a second model, later on.

In [None]:
def build_model():
    model = keras.Sequential([
        layers.Dense(128, activation='relu', input_shape=[len(train_16w_X.keys())]),
        layers.Dense(64, activation='relu'),
        layers.Dense(32, activation='relu'),
        layers.Flatten(),
        layers.Dense(64, activation='relu'),
        layers.Dense(32, activation='relu'),
        layers.Dense(16, activation='relu'),
        layers.Dense(1)
    ])
    optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.001, rho=0.9)

    model.compile(loss='mse',optimizer=optimizer,metrics=['mae', 'mse'])
    return model

Thease are results that I got when trying different optimizers. The best one was RMSprop.


optimizer = tf.keras.optimizers.Adadelta(learning_rate=1.0, rho=0.9)

Mean absolute error regression loss (Best is 0) = 988.04268

Mean squared error (Best is 0) = 1607740.15541

Median absolute error regression loss or Mean absolute percentage error (Best is 0) = 759.51094

Coefficient of determination (Best is 1) = 0.82419


---
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, amsgrad=False)

Mean absolute error regression loss (Best is 0) = 995.84217

Mean squared error (Best is 0) = 1498847.28905

Median absolute error regression loss or Mean absolute percentage error (Best is 0) = 888.22495

Coefficient of determination (Best is 1) = 0.80990



---
optimizer = tf.keras.optimizers.Adamax(learning_rate=0.002, beta_1=0.9, beta_2=0.999)

Mean absolute error regression loss (Best is 0) = 1006.28192

Mean squared error (Best is 0) = 1647878.01326

Median absolute error regression loss or Mean absolute percentage error (Best is 0) = 811.88018

Coefficient of determination (Best is 1) = 0.77732



---

optimizer = tf.keras.optimizers.Nadam(learning_rate=0.002, beta_1=0.9, beta_2=0.999)
Mean absolute error regression loss (Best is 0) = 982.63614

Mean squared error (Best is 0) = 1479295.82364

Median absolute error regression loss or Mean absolute percentage error (Best is 0) = 894.45721

Coefficient of determination (Best is 1) = 0.81385


---

optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.001, rho=0.9)

Mean absolute error regression loss (Best is 0) = 956.51279

Mean squared error (Best is 0) = 1532729.52835

Median absolute error regression loss or Mean absolute percentage error (Best is 0) = 744.18901

Coefficient of determination (Best is 1) = 0.82001

In [None]:
model = build_model()

***Inspect the model***

Use the .summary method to print a simple description of the model

In [None]:
model.summary()

***Train the model***

Train the model for 200 epochs, and record the training and validation accuracy in the history object.

In [None]:
EPOCHS = 200
history = model.fit(train_scaler_16w_X, train_16w_y, epochs=EPOCHS, validation_split = 0.2, verbose=0, callbacks=[tfdocs.modeling.EpochDots()])

In [None]:
plotter = tfdocs.plots.HistoryPlotter(smoothing_std=2)

In [None]:
plotter.plot({'Basic': history}, metric = "mae")
plt.ylabel('MAE [GrainYield]')

In [None]:
plotter.plot({'Basic': history}, metric = "mse")
plt.ylabel('MSE [GrainYield^2]')

In [None]:
model = build_model()

# The patience parameter is the amount of epochs to check for improvement
early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)
early_history = model.fit(train_scaler_16w_X, train_16w_y, epochs=EPOCHS, validation_split = 0.2, verbose=0, callbacks=[early_stop, tfdocs.modeling.EpochDots()])

Let's see how well the model generalizes by using the test set, which we did not use when training the model. This tells us how well we can expect the model to predict when we use it in the real world.

In [None]:
loss, mae, mse = model.evaluate(test_scaler_16w_X, test_16w_y, verbose=2)
print("Testing set Mean Abs Error: {:5.2f} GrainYield".format(mae))

***Make predictions***

Finally, predict GrainYield values using data in the testing set:

In [None]:
y_pred_16w_nn = model.predict(test_scaler_16w_X).flatten()

In [None]:
modelEvaluation(test_16w_y, y_pred_16w_nn)

It looks like our model predicts reasonably well. Let's take a look at the error distribution.

In [None]:
errorPlot(test_16w_y, y_pred_16w_nn)

# Corn - 15 weeks

In [None]:
dataCorn15w = pd.read_csv('./dataset/cornFeatures15w.csv')
corrCorn15w = dataCorn15w.corr()
corrCorn15w.style.background_gradient(cmap='coolwarm').set_precision(2)

In [None]:
# Splitting data set
train_15w_X, test_15w_X = train_test_split(dataCorn15w.drop('GrainYield', axis=1), random_state=1)
train_15w_y, test_15w_y = train_test_split(dataCorn15w['GrainYield'], random_state=1)

# Apply Robust Scaler
scaler = RobustScaler()
train_scaler_15w_X = scaler.fit_transform(train_15w_X)
test_scaler_15w_X = scaler.transform(test_15w_X)

**Lasso**

In [None]:
lm = linear_model.Lasso(alpha=0.8)
lm.fit(train_scaler_15w_X,train_15w_y)
y_pred_15w_lm = lm.predict(test_scaler_15w_X)
modelEvaluation(test_15w_y, y_pred_15w_lm)

important_features = pd.Series(data=lm.coef_,index=dataCorn15w.drop('GrainYield', axis=1).columns)
important_features.sort_values(ascending=False,inplace=True)
print(important_features[:5])
print(important_features[-5:])

In [None]:
errorPlot(test_15w_y, y_pred_15w_lm)

**Decision Tree Regressor**

In [None]:
tree_model = DecisionTreeRegressor()
tree_model.fit(train_scaler_15w_X,train_15w_y)
y_pred_15w_dtr = tree_model.predict(test_scaler_15w_X)
modelEvaluation(test_15w_y, y_pred_15w_dtr)

important_features = pd.Series(data=tree_model.feature_importances_,index=dataCorn15w.drop('GrainYield', axis=1).columns)
important_features.sort_values(ascending=False,inplace=True)
print(important_features[:5])
print(important_features[-5:])

In [None]:
errorPlot(test_15w_y, y_pred_15w_dtr)

**Random Forest Regressor**

In [None]:
rf_model = RandomForestRegressor()
rf_model.fit(train_scaler_15w_X,train_15w_y)
y_pred_15w_rfr = rf_model.predict(test_scaler_15w_X)
modelEvaluation(test_15w_y, y_pred_15w_rfr)

important_features = pd.Series(data=rf_model.feature_importances_,index=dataCorn15w.drop('GrainYield', axis=1).columns)
important_features.sort_values(ascending=False,inplace=True)
print(important_features[:5])
print(important_features[-5:])

In [None]:
errorPlot(test_15w_y, y_pred_15w_rfr)

In [None]:
max_depth = 20
n_est = 200
rf_model = RandomForestRegressor(n_estimators=n_est,max_depth=max_depth,random_state=0)
rf_model.fit(train_scaler_15w_X,train_15w_y)
y_pred_15w_rfrc = rf_model.predict(test_scaler_15w_X)
modelEvaluation(test_15w_y, y_pred_15w_rfrc)

important_features = pd.Series(data=rf_model.feature_importances_,index=dataCorn15w.drop('GrainYield', axis=1).columns)
important_features.sort_values(ascending=False,inplace=True)
print(important_features[:5])
print(important_features[-5:])

In [None]:
errorPlot(test_15w_y, y_pred_15w_rfrc)

**Neural Network**

***Build the model***


Let's build our model. Here, we'll use a Sequential model with seven densely connected hidden layers, and an output layer that returns a single, continuous value. The model building steps are wrapped in a function, build_model, since we'll create a second model, later on.

In [None]:
def build_model():
    model = keras.Sequential([
        layers.Dense(128, activation='relu', input_shape=[len(train_15w_X.keys())]),
        layers.Dense(64, activation='relu'),
        layers.Dense(32, activation='relu'),
        layers.Flatten(),
        layers.Dense(64, activation='relu'),
        layers.Dense(32, activation='relu'),
        layers.Dense(16, activation='relu'),
        layers.Dense(1)
    ])

    optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.001)

    model.compile(loss='mse', optimizer=optimizer, metrics=['mae', 'mse'])
    return model

In [None]:
model = build_model()

***Inspect the model***

Use the .summary method to print a simple description of the model

In [None]:
model.summary()

***Train the model***

Train the model for 200 epochs, and record the training and validation accuracy in the history object.

In [None]:
EPOCHS = 200
history = model.fit(train_scaler_15w_X, train_15w_y, epochs=EPOCHS, validation_split = 0.2, verbose=0, callbacks=[tfdocs.modeling.EpochDots()])

In [None]:
plotter = tfdocs.plots.HistoryPlotter(smoothing_std=2)

In [None]:
plotter.plot({'Basic': history}, metric = "mae")
plt.ylabel('MAE [GrainYield]')

In [None]:
plotter.plot({'Basic': history}, metric = "mse")
plt.ylabel('MSE [GrainYield^2]')

In [None]:
model = build_model()

# The patience parameter is the amount of epochs to check for improvement
early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)
early_history = model.fit(train_scaler_15w_X, train_15w_y, epochs=EPOCHS, validation_split = 0.2, verbose=0, callbacks=[early_stop, tfdocs.modeling.EpochDots()])

In [None]:
loss, mae, mse = model.evaluate(test_scaler_15w_X, test_15w_y, verbose=2)
print("Testing set Mean Abs Error: {:5.2f} GrainYield".format(mae))

In [None]:
y_pred_15w_nn = model.predict(test_scaler_15w_X).flatten()
modelEvaluation(test_15w_y, y_pred_15w_nn)

In [None]:
errorPlot(test_15w_y, y_pred_15w_nn)

# Corn - 14 weeks

In [None]:
dataCorn14w = pd.read_csv('./dataset/cornFeatures14w.csv')
corrCorn14w = dataCorn14w.corr()
corrCorn14w.style.background_gradient(cmap='coolwarm').set_precision(2)

In [None]:
# Splitting data set
train_14w_X, test_14w_X = train_test_split(dataCorn14w.drop('GrainYield', axis=1), random_state=1)
train_14w_y, test_14w_y = train_test_split(dataCorn14w['GrainYield'], random_state=1)

# Apply Robust Scaler
scaler = RobustScaler()
train_scaler_14w_X = scaler.fit_transform(train_14w_X)
test_scaler_14w_X = scaler.transform(test_14w_X)

**Lasso**

In [None]:
lm = linear_model.Lasso(alpha=0.9)
lm.fit(train_scaler_14w_X,train_14w_y)
y_pred_14w_lm = lm.predict(test_scaler_14w_X)
modelEvaluation(test_14w_y, y_pred_14w_lm)

important_features = pd.Series(data=lm.coef_,index=dataCorn14w.drop('GrainYield', axis=1).columns)
important_features.sort_values(ascending=False,inplace=True)
print(important_features[:5])
print(important_features[-5:])

In [None]:
errorPlot(test_14w_y, y_pred_14w_lm)

**Decision Tree Regressor**

In [None]:
tree_model = DecisionTreeRegressor()
tree_model.fit(train_scaler_14w_X,train_14w_y)
y_pred_14w_dtr = tree_model.predict(test_scaler_14w_X)
modelEvaluation(test_14w_y, y_pred_14w_dtr)

mportant_features = pd.Series(data=tree_model.feature_importances_,index=dataCorn14w.drop('GrainYield', axis=1).columns)
important_features.sort_values(ascending=False,inplace=True)
print(important_features[:5])
print(important_features[-5:])

In [None]:
errorPlot(test_14w_y, y_pred_14w_dtr)

**Random Forest Regressor**

In [None]:
rf_model = RandomForestRegressor()
rf_model.fit(train_scaler_14w_X,train_14w_y)
y_pred_14w_rfr = rf_model.predict(test_scaler_14w_X)
modelEvaluation(test_14w_y, y_pred_14w_rfr)

important_features = pd.Series(data=rf_model.feature_importances_,index=dataCorn14w.drop('GrainYield', axis=1).columns)
important_features.sort_values(ascending=False,inplace=True)
print(important_features[:5])
print(important_features[-5:])

In [None]:
max_depth = 20
n_est = 190
rf_model = RandomForestRegressor(n_estimators=n_est,max_depth=max_depth,random_state=0)
rf_model.fit(train_scaler_14w_X,train_14w_y)
y_pred_14w_rfrc = rf_model.predict(test_scaler_14w_X)
modelEvaluation(test_14w_y, y_pred_14w_rfrc)

important_features = pd.Series(data=rf_model.feature_importances_,index=dataCorn14w.drop('GrainYield', axis=1).columns)
important_features.sort_values(ascending=False,inplace=True)
print(important_features[:5])
print(important_features[-5:])

In [None]:
errorPlot(test_14w_y, y_pred_14w_rfrc)

**Neural Network**

***Build the model***


Let's build our model. Here, we'll use a Sequential model with seven densely connected hidden layers, and an output layer that returns a single, continuous value. The model building steps are wrapped in a function, build_model, since we'll create a second model, later on.

In [None]:
def build_model():
    model = keras.Sequential([
        layers.Dense(128, activation='relu', input_shape=[len(train_14w_X.keys())]),
        layers.Dense(64, activation='relu'),
        layers.Dense(32, activation='relu'),
        layers.Flatten(),
        layers.Dense(64, activation='relu'),
        layers.Dense(32, activation='relu'),
        layers.Dense(16, activation='relu'),
        layers.Dense(1)
    ])

    optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.001)

    model.compile(loss='mse',optimizer=optimizer,metrics=['mae', 'mse'])
    return model

In [None]:
model = build_model()

***Inspect the model***

Use the .summary method to print a simple description of the model

In [None]:
model.summary()

***Train the model***

Train the model for 200 epochs, and record the training and validation accuracy in the history object.

In [None]:
EPOCHS = 200
history = model.fit(train_scaler_14w_X, train_14w_y, epochs=EPOCHS, validation_split = 0.2, verbose=0, callbacks=[tfdocs.modeling.EpochDots()])

In [None]:
plotter = tfdocs.plots.HistoryPlotter(smoothing_std=2)

In [None]:
plotter.plot({'Basic': history}, metric = "mae")
plt.ylabel('MAE [GrainYield]')

In [None]:
plotter.plot({'Basic': history}, metric = "mse")
plt.ylabel('MSE [GrainYield^2]')

In [None]:
model = build_model()

# The patience parameter is the amount of epochs to check for improvement
early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=30)
early_history = model.fit(train_scaler_14w_X, train_14w_y, epochs=EPOCHS, validation_split = 0.2, verbose=0, callbacks=[early_stop, tfdocs.modeling.EpochDots()])

In [None]:
loss, mae, mse = model.evaluate(test_scaler_14w_X, test_14w_y, verbose=2)
print("Testing set Mean Abs Error: {:5.2f} GrainYield".format(mae))

In [None]:
y_pred_14w_nn = model.predict(test_scaler_14w_X).flatten()
modelEvaluation(test_14w_y, y_pred_14w_nn)

In [None]:
errorPlot(test_14w_y, y_pred_14w_nn)

# Corn - 13 weeks

In [None]:
dataCorn13w = pd.read_csv('./dataset/cornFeatures13w.csv')
corrCorn13w = dataCorn13w.corr()
corrCorn13w.style.background_gradient(cmap='coolwarm').set_precision(2)

In [None]:
# Splitting data set
train_13w_X, test_13w_X = train_test_split(dataCorn13w.drop('GrainYield', axis=1), random_state=1)
train_13w_y, test_13w_y = train_test_split(dataCorn13w['GrainYield'], random_state=1)

# Apply Robust Scaler
scaler = RobustScaler()
train_scaler_13w_X = scaler.fit_transform(train_13w_X)
test_scaler_13w_X = scaler.transform(test_13w_X)

**Lasso**

In [None]:
lm = linear_model.Lasso(alpha=0.9)
lm.fit(train_scaler_13w_X,train_13w_y)
y_pred_13w_lm = lm.predict(test_scaler_13w_X)
modelEvaluation(test_13w_y, y_pred_13w_lm)

important_features = pd.Series(data=lm.coef_,index=dataCorn13w.drop('GrainYield', axis=1).columns)
important_features.sort_values(ascending=False,inplace=True)
print(important_features[:5])
print(important_features[-5:])

In [None]:
errorPlot(test_13w_y, y_pred_13w_lm)

**Decision Tree Regressor**

In [None]:
tree_model = DecisionTreeRegressor()
tree_model.fit(train_scaler_13w_X,train_13w_y)
y_pred_13w_dtr = tree_model.predict(test_scaler_13w_X)
modelEvaluation(test_13w_y, y_pred_13w_dtr)

mportant_features = pd.Series(data=tree_model.feature_importances_,index=dataCorn13w.drop('GrainYield', axis=1).columns)
important_features.sort_values(ascending=False,inplace=True)
print(important_features[:5])
print(important_features[-5:])

In [None]:
errorPlot(test_13w_y, y_pred_13w_dtr)

**Random Forest Regressor**

In [None]:
rf_model = RandomForestRegressor()
rf_model.fit(train_scaler_13w_X,train_13w_y)
y_pred_13w_rfr = rf_model.predict(test_scaler_13w_X)
modelEvaluation(test_13w_y, y_pred_13w_rfr)

important_features = pd.Series(data=rf_model.feature_importances_,index=dataCorn13w.drop('GrainYield', axis=1).columns)
important_features.sort_values(ascending=False,inplace=True)
print(important_features[:5])
print(important_features[-5:])

In [None]:
max_depth = 20
n_est = 190
rf_model = RandomForestRegressor(n_estimators=n_est,max_depth=max_depth,random_state=0)
rf_model.fit(train_scaler_13w_X,train_13w_y)
y_pred_13w_rfrc = rf_model.predict(test_scaler_13w_X)
modelEvaluation(test_13w_y, y_pred_13w_rfrc)

important_features = pd.Series(data=rf_model.feature_importances_,index=dataCorn13w.drop('GrainYield', axis=1).columns)
important_features.sort_values(ascending=False,inplace=True)
print(important_features[:5])
print(important_features[-5:])

In [None]:
errorPlot(test_13w_y, y_pred_13w_rfrc)

**Neural Network**

***Build the model***


Let's build our model. Here, we'll use a Sequential model with seven densely connected hidden layers, and an output layer that returns a single, continuous value. The model building steps are wrapped in a function, build_model, since we'll create a second model, later on.

In [None]:
def build_model():
    model = keras.Sequential([
        layers.Dense(128, activation='relu', input_shape=[len(train_13w_X.keys())]),
        layers.Dense(64, activation='relu'),
        layers.Dense(32, activation='relu'),
        layers.Flatten(),
        layers.Dense(64, activation='relu'),
        layers.Dense(32, activation='relu'),
        layers.Dense(16, activation='relu'),
        layers.Dense(1)
    ])

    optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.001)

    model.compile(loss='mse',optimizer=optimizer,metrics=['mae', 'mse'])
    return model

In [None]:
model = build_model()

***Inspect the model***

Use the .summary method to print a simple description of the model

In [None]:
model.summary()

***Train the model***

Train the model for 200 epochs, and record the training and validation accuracy in the history object.

In [None]:
EPOCHS = 200
history = model.fit(train_scaler_13w_X, train_13w_y, epochs=EPOCHS, validation_split = 0.2, verbose=0, callbacks=[tfdocs.modeling.EpochDots()])

In [None]:
loss, mae, mse = model.evaluate(test_scaler_13w_X, test_13w_y, verbose=2)
print("Testing set Mean Abs Error: {:5.2f} GrainYield".format(mae))

In [None]:
plotter = tfdocs.plots.HistoryPlotter(smoothing_std=2)

In [None]:
plotter.plot({'Basic': history}, metric = "mae")
plt.ylabel('MAE [GrainYield]')

In [None]:
plotter.plot({'Basic': history}, metric = "mse")
plt.ylabel('MSE [GrainYield^2]')

In [None]:
model = build_model()

# The patience parameter is the amount of epochs to check for improvement
early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=30)
early_history = model.fit(train_scaler_13w_X, train_13w_y, epochs=EPOCHS, validation_split = 0.2, verbose=0, callbacks=[early_stop, tfdocs.modeling.EpochDots()])

In [None]:
loss, mae, mse = model.evaluate(test_scaler_13w_X, test_13w_y, verbose=2)
print("Testing set Mean Abs Error: {:5.2f} GrainYield".format(mae))

In [None]:
y_pred_13w_nn = model.predict(test_scaler_13w_X).flatten()
modelEvaluation(test_13w_y, y_pred_13w_nn)

In [None]:
errorPlot(test_13w_y, y_pred_13w_nn)

In [None]:
data = np.array([[median_absolute_error(test_13w_y, y_pred_13w_lm), median_absolute_error(test_14w_y, y_pred_14w_lm), median_absolute_error(test_15w_y, y_pred_15w_lm), median_absolute_error(test_16w_y, y_pred_16w_lm)],
                 [median_absolute_error(test_13w_y, y_pred_13w_dtr), median_absolute_error(test_14w_y, y_pred_14w_dtr), median_absolute_error(test_15w_y, y_pred_15w_dtr), median_absolute_error(test_16w_y, y_pred_16w_dtr)],
                 [median_absolute_error(test_13w_y, y_pred_13w_rfr), median_absolute_error(test_14w_y, y_pred_14w_rfr), median_absolute_error(test_15w_y, y_pred_15w_rfr), median_absolute_error(test_16w_y, y_pred_16w_rfr)],
                 [median_absolute_error(test_13w_y, y_pred_13w_rfrc), median_absolute_error(test_14w_y, y_pred_14w_rfrc), median_absolute_error(test_15w_y, y_pred_15w_rfrc), median_absolute_error(test_16w_y, y_pred_16w_rfrc)],
                 [median_absolute_error(test_13w_y, y_pred_13w_nn), median_absolute_error(test_14w_y, y_pred_14w_nn), median_absolute_error(test_15w_y, y_pred_15w_nn), median_absolute_error(test_16w_y, y_pred_16w_nn)]])
length = len(data)
x_labels = ['Lasso', 'Decision Tree', 'Random Forest', 'Random Forest C', 'Neural Network']

# Set plot parameters
fig, ax = plt.subplots()
fig.set_size_inches(7,5)

width = 0.2 # width of bar
x = np.arange(length)

ax.bar(x, data[:,0], width, color='#000080', label='13 weeks')
ax.bar(x + width, data[:,1], width, color='#0F52BA', label='14 weeks')
ax.bar(x + (2 * width), data[:,2], width, color='#6593F5', label='15 weeks')
ax.bar(x + (3 * width), data[:,3], width, color='#93b4fa', label='16 weeks')


ax.set_ylim(710,840)
ax.set_xticks(x + width + width/2)
ax.set_xticklabels(x_labels)
ax.set_ylabel('Grain Yield (kg/ha)')
ax.set_xlabel('Regression Model')
ax.set_title('Median Absolute Percentage Error (MAPE)')
ax.legend()
plt.grid(True, 'major', 'y', ls='--', lw=.5, c='k', alpha=.3)

fig.tight_layout()
plt.show()

In [None]:
data = np.array([[r2_score(y_pred_13w_lm, test_13w_y), r2_score(y_pred_14w_lm, test_14w_y), r2_score(y_pred_15w_lm, test_15w_y), r2_score(y_pred_16w_lm, test_16w_y)],
                 [r2_score(y_pred_13w_dtr, test_13w_y), r2_score(y_pred_14w_dtr, test_14w_y), r2_score(y_pred_15w_dtr, test_15w_y), r2_score(y_pred_16w_dtr, test_16w_y)],
                 [r2_score(y_pred_13w_rfr, test_13w_y), r2_score(y_pred_14w_rfr, test_14w_y), r2_score(y_pred_15w_rfr, test_15w_y), r2_score(y_pred_16w_rfr, test_16w_y)],
                 [r2_score(y_pred_13w_rfrc, test_13w_y), r2_score(y_pred_14w_rfrc, test_14w_y), r2_score(y_pred_15w_rfrc, test_15w_y), r2_score(y_pred_16w_rfrc, test_16w_y)],
                 [r2_score(y_pred_13w_nn, test_13w_y), r2_score(y_pred_14w_nn, test_14w_y), r2_score(y_pred_15w_nn, test_15w_y), r2_score(y_pred_16w_nn, test_16w_y)]])
#r2_score(y_pred, test_y)
length = len(data)
x_labels = ['Lasso', 'Decision Tree', 'Random Forest', 'Random Forest C', 'Neural Network']

# Set plot parameters
fig, ax = plt.subplots()
fig.set_size_inches(7,5)
width = 0.2 # width of bar
x = np.arange(length)

ax.bar(x, data[:,0], width, color='#000080', label='13 weeks')
ax.bar(x + width, data[:,1], width, color='#0F52BA', label='14 weeks')
ax.bar(x + (2 * width), data[:,2], width, color='#6593F5', label='15 weeks')
ax.bar(x + (3 * width), data[:,3], width, color='#93b4fa', label='16 weeks')

ax.set_ylim(0.805,0.835)
ax.set_xticks(x + width + width/2)
ax.set_xticklabels(x_labels)
ax.set_xlabel('Regression Model')
ax.set_title('Coefficient of Determination (R^2)')
ax.legend()
plt.grid(True, 'major', 'y', ls='--', lw=.5, c='k', alpha=.3)

fig.tight_layout()
plt.show()