# Neural Network Training
This Notebook contains the code to train a Neural Network on the crop data.

The parameters in the model below are the one's that were decided upon after a parameter search. It should be noted though that compared to the ensemble methods, the NNs have a much larger search space over relevant parameters.

The Neural Network is restricted to the same amount of nodes in each hidden layer to reduce parameter search needed.

This code is simplified from original the script used, which had to iterate through validation sets, using the degree splitting technique. To simplify, this code only splits the data into training and test sets, and only trains one model.

The final model used in the predictions for the project was the Random Forest. Given more time, an exhaustive hyper parameter search would have been used to identify the best NN architecture more accurately.

In [None]:
import pandas as pd
import random
import torch
import torch.nn as nn
import torch.optim as optim
import torch.autograd as autograd
import torch.nn.functional as F
import random
import numpy as np
from sklearn.preprocessing import StandardScaler
from torch.utils.tensorboard import SummaryWriter
import random
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split

In [None]:
# Parameters for the training run. All non target and non categorical columns will be features
# will be normalized.
target = 'maize_a_2010'
categorical_features = ['soil_types', 'climate_zones']
read_file_path = 'data_set.csv'

# NN parameters
test_size = 0.2
adam_epsilon = 1.5e-4
lr = 0.00001
batch_size = 64
dropout = 0.05
layer_size = 1000
training_step_iteration = 1000
max_training_steps = 100000
optimizer = torch.optim.Adam
loss_func = torch.nn.MSELoss()

In [None]:
# Read the data in to pandas
data = pd.read_csv(read_file_path)

In [None]:
def autograd_variable(input):
    """ Convert input to a torch cuda or cpu tensor"""
    if torch.cuda.is_available():
        return autograd.Variable(input).cuda()
    return autograd.Variable(input)

class NN(nn.Module):
    def __init__(self, feature_count, dropout=dropout):
        super(NN, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(feature_count, layer_size),
            nn.Dropout(p=dropout),
            nn.ReLU(),
            nn.Linear(layer_size, layer_size),
            nn.Dropout(p=dropout),
            nn.ReLU(),
            nn.Linear(layer_size, layer_size),
            nn.Dropout(p=dropout),
            nn.ReLU(),
            nn.Linear(layer_size, layer_size),
            nn.Dropout(p=dropout),
            nn.ReLU(),
            nn.Linear(layer_size, layer_size),
            nn.Dropout(p=dropout),
            nn.ReLU(),
            nn.Linear(layer_size, 1),
            nn.Dropout(p=dropout),
        )

    def forward(self, x):
        x = self.fc(autograd_variable(x))
        return x
    
    def predict(self, x):
        with torch.no_grad():
            if torch.cuda.is_available():
                return self.forward(x.cuda())
            else:
                return self.forward(x)

In [None]:
# One hot encode categorical features
yields = data[[target]]
continuous_features_not_transformed = data.drop(columns=[target] + categorical_features)
scaled_columns = list(features_not_transformed.columns)

soil_dummies = pd.get_dummies(data.soil_types)
climate_zone_dummies = pd.get_dummies(data.climate_zones)
data_with_encoded_categorical_data = pd.concat([features_not_transformed, soil_dummies, climate_zone_dummies], axis=1)

In [None]:
# Split the data into a train and test set
X_train, X_test, y_train, y_test = train_test_split(data_with_encoded_categorical_data, yields, test_size=test_size)

In [None]:
# Scale the continuous data
scaler_feature = StandardScaler()
scaler_target = StandardScaler()

target_data = scaler_target.fit_transform(y_train)
train_feature_data = np.concatenate([scaler_feature.fit_transform(X_train[scaled_columns]), X_train.drop(columns=scaled_columns).values], axis=1)
test_feature_data = np.concatenate([scaler_feature.transform(X_test[scaled_columns]), X_test.drop(columns=scaled_columns).values], axis=1)

In [None]:
# Create the model
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
live_model = NN(feature_data.shape[1], dropout=dropout).to(device)

In [None]:
# Main training loop
optimizer = optimizer(live_model.parameters(), lr=lr)

for i in range(max_training_steps):
    random_indices = np.random.randint(n, size=batch_size)
    target = torch.FloatTensor(target_data[random_indices, :])
    features = torch.FloatTensor(train_feature_data[random_indices, :])
    prediction = live_model.forward(autograd_variable(features))
    loss = loss_func(prediction, autograd_variable(target))
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    if i % training_step_iteration == 0:
        with torch.no_grad():
            test_prediction = live_model.predict(torch.FloatTensor(test_feature_data)).cpu()
            train_prediction = live_model.predict(torch.FloatTensor(train_feature_data)).cpu()
        
        train_yield_predicted = scaler_target.inverse_transform(train_prediction)
        test_yield_predicted = scaler_target.inverse_transform(test_prediction)
        print('Test Set R2 Score',
              r2_score(y_test, test_yield_predicted),
              'Train Set R2 Score',
              r2_score(y_train, train_yield_predicted))

In [None]:
# Save the model parameters
torch.save(live_model.state_dict(), 'model_params')