# Preparing Data

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
#for model creation
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.optimizers import SGD

In [None]:
#for hyperparameter search
from sklearn.model_selection import GridSearchCV

In [None]:
df = pd.read_csv('cucumbers.csv')

In [None]:
#df.head()

In [None]:
#df.describe()

In [None]:
#deleting outliers in irrigation
df = df.loc[df.loc[:,"irrigation"] > -20000]
df.describe()

In [None]:
df = df.dropna()
df= df.drop(columns="cycle")
#df.isnull().sum()

In [None]:
#Categorial data to intergers
df_onehot = pd.get_dummies(df, 'country')
df_onehot.head(5)


In [None]:
#Seperating x(features) from y(targets)
target_columns = ["yield"]

feature_columns = df_onehot.columns.drop(target_columns)
n_features = len(feature_columns)

# x = input features, y = output targets
x = df_onehot.loc[:, feature_columns]
y = df_onehot.loc[:, target_columns]

#x.head(5)
#y.head(5)

In [None]:
#Train Test Split
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=42)

In [None]:
#Scaling
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

In [None]:
#x_test.min()
#x_test.max()
#x_train.min()
#x_train.max()

In [None]:
#n_features

# Grid Search

In [None]:
from tensorflow.keras import initializers
seed = 42

In [None]:
#model creation
def define_model(activation = "relu", optimizer = "Adam", neurons_hl1 = 32,neurons_hl2=10, neurons_hl3=10,regulizer = 0, learning_rate = 0.001, hiddenlayers=4):
    model = Sequential()
    
    #input layer
    model.add(Input(shape=n_features))
    
    #hidden layers
    if hiddenlayers >0:
        model.add(Dense(units=neurons_hl1, activation=activation, kernel_initializer="he_normal",
                           kernel_regularizer=tf.keras.regularizers.l2(regulizer)))
        #model.add(Dropout=Dropout)
    if hiddenlayers >1:
        model.add(Dense(units=neurons_hl2, activation=activation, kernel_initializer="he_normal",
                           kernel_regularizer=tf.keras.regularizers.l2(regulizer)))
        #model.add(Dropout=Dropout)
    if hiddenlayers >2:
        model.add(Dense(units=neurons_hl3, activation=activation, kernel_initializer="he_normal",
                           kernel_regularizer=tf.keras.regularizers.l2(regulizer)))
        #model.add(Dropout=Dropout)

    if optimizer == "Adam":
        optimizer = Adam(learning_rate = learning_rate )
    if optimizer == "SGD"
        optimizer = SGD(learning_rate = learning_rate)
    
    #output layer
    model.add(Dense(units=1, activation='linear'))
    
    #Model compiling
    model.compile(optimizer = optimizer, loss='mape')
    return model

In [None]:
from keras.wrappers.scikit_learn import KerasRegressor


In [None]:
batch_size = 32 #64
epochs = 200 #300,600

In [None]:
seed = 42

model = KerasRegressor(build_fn = define_model, epochs = epochs,
                       batch_size = batch_size, verbose = 1)

In [None]:
#To speed up the grid search we have continued with the best parameters from previous searches.
# previous values which were worse, are commentated behind the final paramaters.
activation = ["relu"] #sigmoid
optimizer = ["Adam"] #SGD
regulizer = [0,0.01,0.005] #L2 is used
learning_rate = [0.0045] #0.001
neurons_hl1 = [64,252,504] 
neurons_hl2 = [8,252,504,150]
neurons_hl3 = [16,32,64]
#all Neurons previously used = [4,8,16,32,64,128,264,552]

hiddenlayers= [3] #HiddenLayer combination previously used = [1,2,3]

In [None]:
from sklearn.model_selection import cross_val_score  

In [None]:
param_grid = dict(activation = activation, optimizer = optimizer, regulizer = regulizer, learning_rate = learning_rate,
                  neurons_hl1=neurons_hl1, neurons_hl2=neurons_hl2, neurons_hl3=neurons_hl3, hiddenlayers=hiddenlayers)

In [None]:
grid = GridSearchCV(estimator=model, scoring = "neg_mean_absolute_percentage_error", 
                    param_grid = param_grid, n_jobs = -1, cv =5, verbose=1)

In [None]:
#sorted(sklearn.metrics.SCORERS.keys())

In [None]:
grid_result = grid.fit(x_train, y_train)

In [None]:
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

In [None]:
means = grid_result.cv_results_["mean_test_score"]
stds = grid_result.cv_results_["std_test_score"]
params = grid_result.cv_results_["params"]
for mean, stdev, param in zip(means, stds, params):
    print("Mean = %f (std=%f) with: %r" % (mean, stdev, param))

In [None]:
allresults=pd.DataFrame(grid_result.cv_results_)
allresults.sort_values("mean_test_score",ascending=False)[["param_hiddenlayers","param_neurons_hl1","param_neurons_hl2","mean_test_score"]].head(10)