# 12. Neural network

In [1]:
import numpy as np
from PIL import Image
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.model_selection import PredefinedSplit
from sklearn.model_selection import GridSearchCV
from sklearn.decomposition import PCA
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
import warnings
import math
from tqdm import tqdm
from sklearn.metrics import confusion_matrix
import seaborn as sns
from tensorflow.keras import Sequential, initializers, optimizers
from tensorflow.keras.layers import Dense
from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import FunctionTransformer

In [2]:
#mute future warning
warnings.simplefilter(action='ignore', category=FutureWarning)

In [3]:
#load metadata
data = pd.read_csv("data/metadata/selection_sharp.csv")
path = data["crop_storage"]

## First classification pool

In [4]:
#define sets (add header=None for labels since there is no column name and turn to Series)
X_tr = pd.read_csv("data/features/pool1/X_tr.csv", index_col=0)
y_tr = pd.Series(pd.read_csv("data/features/pool1/y_tr.csv", header=None, index_col=0)[1])
X_va = pd.read_csv("data/features/pool1/X_va.csv", index_col=0)
y_va = pd.Series(pd.read_csv("data/features/pool1/y_va.csv", header=None, index_col=0)[1])
X_te = pd.read_csv("data/features/pool1/X_te.csv", index_col=0)
y_te = pd.Series(pd.read_csv("data/features/pool1/y_te.csv", header=None, index_col=0)[1])

In [5]:
#define statistical data type of columns 
cat_columns = ['Month']
num_columns = list(range(300))

#define categorical transformer
cat_transformer = OneHotEncoder(sparse=False)

#define the log transformer
log_transformer = FunctionTransformer(np.log1p)

#define the column transformer
preprocessor = ColumnTransformer([
('categorical', cat_transformer, cat_columns),
("log", log_transformer, ["altitude"]),    
("scale_log", StandardScaler(), ["altitude"])
], remainder='passthrough')

In [6]:
#fit the train data and transform
X_tr = preprocessor.fit_transform(X_tr)

#transform the validation and test data
X_va = preprocessor.transform(X_va)
X_te = preprocessor.transform(X_te)

In [7]:
#turn labels to float
#define classes
classes = y_tr.unique()

#create an ordinal transformer indicating the 6 classes as categories
ordinal_transformer = OrdinalEncoder(categories=[classes])

#turn the labels to float
y_tr_float = ordinal_transformer.fit_transform(y_tr[:,np.newaxis]).reshape(y_tr.shape[0],)
y_va_float = ordinal_transformer.fit_transform(y_va[:,np.newaxis]).reshape(y_va.shape[0],)
y_te_float = ordinal_transformer.fit_transform(y_te[:,np.newaxis]).reshape(y_te.shape[0],)

In [8]:
#define number of features
n_feat = X_tr.shape[1]

#create neural network with one hidden layer
nn1 = Sequential()
#add hidden layer
nn1.add(Dense(20, activation="relu", input_dim=n_feat, kernel_initializer=initializers.VarianceScaling(scale=2.0, seed=0)))
#add output layer
nn1.add(Dense(6, activation="softmax", kernel_initializer=initializers.VarianceScaling(scale=1.0, seed=0)))
# Print network summary
nn1.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 20)                6340      
_________________________________________________________________
dense_1 (Dense)              (None, 6)                 126       
Total params: 6,466
Trainable params: 6,466
Non-trainable params: 0
_________________________________________________________________


In [9]:
#compile model with a stochastic gradient descent as optimizer, crossentropy as loss function and accuracy as metrics
nn1.compile(optimizer='sgd', loss='sparse_categorical_crossentropy', metrics=['acc'])

In [10]:
#fit the neural network
history_nn1 = nn1.fit(
x=X_tr, y=y_tr_float,
validation_data=(X_va, y_va_float), batch_size=32, epochs=50,
shuffle=True
)

Train on 664 samples, validate on 285 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [11]:
#compute test accuracy
(nn1_te_loss1, nn1_te_accuracy1) = nn1.evaluate(X_te, y_te_float, batch_size=32)
nn1_te_accuracy1



0.9327731092436975

## Second classification pool

In [12]:
#define sets (add header=None for labels since there is no column name and turn to Series)
X_tr = pd.read_csv("data/features/pool2/X_tr.csv", index_col=0)
y_tr = pd.Series(pd.read_csv("data/features/pool2/y_tr.csv", header=None, index_col=0)[1])
X_va = pd.read_csv("data/features/pool2/X_va.csv", index_col=0)
y_va = pd.Series(pd.read_csv("data/features/pool2/y_va.csv", header=None, index_col=0)[1])
X_te = pd.read_csv("data/features/pool2/X_te.csv", index_col=0)
y_te = pd.Series(pd.read_csv("data/features/pool2/y_te.csv", header=None, index_col=0)[1])

In [13]:
#define statistical data type of columns 
cat_columns = ['Month']
num_columns = list(range(300))

#define categorical transformer
cat_transformer = OneHotEncoder(sparse=False)

#define the log transformer
log_transformer = FunctionTransformer(np.log1p)

#define the column transformer
preprocessor = ColumnTransformer([
('categorical', cat_transformer, cat_columns),
("log", log_transformer, ["altitude"]),    
("scale_log", StandardScaler(), ["altitude"])
], remainder='passthrough')

In [14]:
#fit the train data and transform
X_tr = preprocessor.fit_transform(X_tr)

#transform the validation and test data
X_va = preprocessor.transform(X_va)
X_te = preprocessor.transform(X_te)

In [15]:
#turn labels to float
#define classes
classes = y_tr.unique()

#create an ordinal transformer indicating the 6 classes as categories
ordinal_transformer = OrdinalEncoder(categories=[classes])

#turn the labels to float
y_tr_float = ordinal_transformer.fit_transform(y_tr[:,np.newaxis]).reshape(y_tr.shape[0],)
y_va_float = ordinal_transformer.fit_transform(y_va[:,np.newaxis]).reshape(y_va.shape[0],)
y_te_float = ordinal_transformer.fit_transform(y_te[:,np.newaxis]).reshape(y_te.shape[0],)

In [16]:
#define number of features
n_feat = X_tr.shape[1]

#create neural network with one hidden layer
nn1 = Sequential()
#add hidden layer
nn1.add(Dense(20, activation="relu", input_dim=n_feat, kernel_initializer=initializers.VarianceScaling(scale=2.0, seed=0)))
#add output layer
nn1.add(Dense(6, activation="softmax", kernel_initializer=initializers.VarianceScaling(scale=1.0, seed=0)))
# Print network summary
nn1.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_2 (Dense)              (None, 20)                6340      
_________________________________________________________________
dense_3 (Dense)              (None, 6)                 126       
Total params: 6,466
Trainable params: 6,466
Non-trainable params: 0
_________________________________________________________________


In [17]:
#compile model with a stochastic gradient descent as optimizer, crossentropy as loss function and accuracy as metrics
nn1.compile(optimizer='sgd', loss='sparse_categorical_crossentropy', metrics=['acc'])

In [18]:
#fit the neural network
history_nn1 = nn1.fit(
x=X_tr, y=y_tr_float,
validation_data=(X_va, y_va_float), batch_size=32, epochs=50,
shuffle=True
)

Train on 575 samples, validate on 247 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [19]:
#compute test accuracy
(nn1_te_loss2, nn1_te_accuracy2) = nn1.evaluate(X_te, y_te_float, batch_size=32)
nn1_te_accuracy2



0.9223300959300069

## First classication pool balanced

In [20]:
#define sets (add header=None for labels since there is no column name and turn to Series)
X_tr = pd.read_csv("data/features/pool1_bal/X_tr.csv", index_col=0)
y_tr = pd.Series(pd.read_csv("data/features/pool1_bal/y_tr.csv", header=None, index_col=0)[1])
X_va = pd.read_csv("data/features/pool1_bal/X_va.csv", index_col=0)
y_va = pd.Series(pd.read_csv("data/features/pool1_bal/y_va.csv", header=None, index_col=0)[1])
X_te = pd.read_csv("data/features/pool1_bal/X_te.csv", index_col=0)
y_te = pd.Series(pd.read_csv("data/features/pool1_bal/y_te.csv", header=None, index_col=0)[1])

In [21]:
#define statistical data type of columns 
cat_columns = ['Month']
num_columns = list(range(300))

#define categorical transformer
cat_transformer = OneHotEncoder(sparse=False)

#define the log transformer
log_transformer = FunctionTransformer(np.log1p)

#define the column transformer
preprocessor = ColumnTransformer([
('categorical', cat_transformer, cat_columns),
("log", log_transformer, ["altitude"]),    
("scale_log", StandardScaler(), ["altitude"])
], remainder='passthrough')

In [22]:
#fit the train data and transform
X_tr = preprocessor.fit_transform(X_tr)

#transform the validation and test data
X_va = preprocessor.transform(X_va)
X_te = preprocessor.transform(X_te)

In [23]:
#turn labels to float
#define classes
classes = y_tr.unique()

#create an ordinal transformer indicating the 6 classes as categories
ordinal_transformer = OrdinalEncoder(categories=[classes])

#turn the labels to float
y_tr_float = ordinal_transformer.fit_transform(y_tr[:,np.newaxis]).reshape(y_tr.shape[0],)
y_va_float = ordinal_transformer.fit_transform(y_va[:,np.newaxis]).reshape(y_va.shape[0],)
y_te_float = ordinal_transformer.fit_transform(y_te[:,np.newaxis]).reshape(y_te.shape[0],)

In [24]:
#define number of features
n_feat = X_tr.shape[1]

#create neural network with one hidden layer
nn1 = Sequential()
#add hidden layer
nn1.add(Dense(20, activation="relu", input_dim=n_feat, kernel_initializer=initializers.VarianceScaling(scale=2.0, seed=0)))
#add output layer
nn1.add(Dense(6, activation="softmax", kernel_initializer=initializers.VarianceScaling(scale=1.0, seed=0)))
# Print network summary
nn1.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 20)                6340      
_________________________________________________________________
dense_5 (Dense)              (None, 6)                 126       
Total params: 6,466
Trainable params: 6,466
Non-trainable params: 0
_________________________________________________________________


In [25]:
#compile model with a stochastic gradient descent as optimizer, crossentropy as loss function and accuracy as metrics
nn1.compile(optimizer='sgd', loss='sparse_categorical_crossentropy', metrics=['acc'])

In [26]:
#fit the neural network
history_nn1 = nn1.fit(
x=X_tr, y=y_tr_float,
validation_data=(X_va, y_va_float), batch_size=32, epochs=50,
shuffle=True
)

Train on 523 samples, validate on 225 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [27]:
#compute test accuracy
(nn1_te_loss1_bal, nn1_te_accuracy1_bal) = nn1.evaluate(X_te, y_te_float, batch_size=32)
nn1_te_accuracy1_bal



0.9358288747741577

## Second classification pool balanced

In [28]:
#define sets (add header=None for labels since there is no column name and turn to Series)
X_tr = pd.read_csv("data/features/pool2_bal/X_tr.csv", index_col=0)
y_tr = pd.Series(pd.read_csv("data/features/pool2_bal/y_tr.csv", header=None, index_col=0)[1])
X_va = pd.read_csv("data/features/pool2_bal/X_va.csv", index_col=0)
y_va = pd.Series(pd.read_csv("data/features/pool2_bal/y_va.csv", header=None, index_col=0)[1])
X_te = pd.read_csv("data/features/pool2_bal/X_te.csv", index_col=0)
y_te = pd.Series(pd.read_csv("data/features/pool2_bal/y_te.csv", header=None, index_col=0)[1])

In [29]:
#define statistical data type of columns 
cat_columns = ['Month']
num_columns = list(range(300))

#define categorical transformer
cat_transformer = OneHotEncoder(sparse=False)

#define the log transformer
log_transformer = FunctionTransformer(np.log1p)

#define the column transformer
preprocessor = ColumnTransformer([
('categorical', cat_transformer, cat_columns),
("log", log_transformer, ["altitude"]),    
("scale_log", StandardScaler(), ["altitude"])
], remainder='passthrough')

In [30]:
#fit the train data and transform
X_tr = preprocessor.fit_transform(X_tr)

#transform the validation and test data
X_va = preprocessor.transform(X_va)
X_te = preprocessor.transform(X_te)

In [31]:
#turn labels to float
#define classes
classes = y_tr.unique()

#create an ordinal transformer indicating the 6 classes as categories
ordinal_transformer = OrdinalEncoder(categories=[classes])

#turn the labels to float
y_tr_float = ordinal_transformer.fit_transform(y_tr[:,np.newaxis]).reshape(y_tr.shape[0],)
y_va_float = ordinal_transformer.fit_transform(y_va[:,np.newaxis]).reshape(y_va.shape[0],)
y_te_float = ordinal_transformer.fit_transform(y_te[:,np.newaxis]).reshape(y_te.shape[0],)

In [32]:
#define number of features
n_feat = X_tr.shape[1]

#create neural network with one hidden layer
nn1 = Sequential()
#add hidden layer
nn1.add(Dense(20, activation="relu", input_dim=n_feat, kernel_initializer=initializers.VarianceScaling(scale=2.0, seed=0)))
#add output layer
nn1.add(Dense(6, activation="softmax", kernel_initializer=initializers.VarianceScaling(scale=1.0, seed=0)))
# Print network summary
nn1.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_6 (Dense)              (None, 20)                6340      
_________________________________________________________________
dense_7 (Dense)              (None, 6)                 126       
Total params: 6,466
Trainable params: 6,466
Non-trainable params: 0
_________________________________________________________________


In [33]:
#compile model with a stochastic gradient descent as optimizer, crossentropy as loss function and accuracy as metrics
nn1.compile(optimizer='sgd', loss='sparse_categorical_crossentropy', metrics=['acc'])

In [34]:
#fit the neural network
history_nn1 = nn1.fit(
x=X_tr, y=y_tr_float,
validation_data=(X_va, y_va_float), batch_size=32, epochs=50,
shuffle=True
)

Train on 204 samples, validate on 88 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [35]:
#compute test accuracy
(nn1_te_loss2_bal, nn1_te_accuracy2_bal) = nn1.evaluate(X_te, y_te_float, batch_size=32)
nn1_te_accuracy2_bal



0.9315068501315705

In [36]:
#summarise results
nn_results = pd.Series([nn1_te_accuracy1,nn1_te_accuracy2, nn1_te_accuracy1_bal, nn1_te_accuracy2_bal],["Neural network Pool 1","Neural network Pool 2", "Neural network Pool 1 balanced","Neural network Pool 2 balanced"])
nn_results

Neural network Pool 1             0.932773
Neural network Pool 2             0.922330
Neural network Pool 1 balanced    0.935829
Neural network Pool 2 balanced    0.931507
dtype: float64

In [37]:
#save results
nn_results.to_csv("data/results/nn.csv")