In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from glob import glob
from scipy.stats import pearsonr
from hyperopt import Trials, STATUS_OK, tpe
from hyperas import optim
from hyperas.distributions import choice, uniform
from hyperparameteroptimization import tune_1st_NN, tune_2nd_NN

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import *
from tensorflow.keras import Input
from tensorflow.keras.utils import plot_model
from utils import * 
from buildmodel import prepare_10kmdata, prepare_40mdata, get_landcover_bands,z_score

In [2]:
#define file path and files
#file1: satellite data for randomly sampled points across U.S.
#file2: satellite export data associated with in situ points

path = '/Users/Mitchell/Documents/MLEnvironment/SoilMoistureDownscalingEAEE4000/'
file1 =  path + 'DataDownload/pointsamples/Point100Sample.csv'
# data file
file2 = path + 'DataDownload/InSitu/InSituGEEOutputs/SMGaugePoints2.csv'

### Step 1: Build first neural network ###
This neural network will be trained on ______ random points from contiguous USA. Data has been downloaded and reprojected to 10 km EPSG:4326 resolution in Google Earth Engine. Data for this section is from "file1".

Predictors: 
* All 13 bands plus NDVI from Sentinel-2
* VV, VH, and angle measurement from Sentinel-1
* mTPI ('elevation')
* landcover classification (categorical data transformed with One Hot Encoding)

Predictand:
* Surface Soil Moisture as measured by SMAP

In [3]:
# get the landcover bands that will be used in this study. This is needed for future one hot encoding.
lc_bands = list(get_landcover_bands([file1,file2]))
# define predictors and predictand in file
predictors = ['B1', 'B10', 'B11', 'B12', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8',
       'B8A', 'B9', 'VH', 'VV', 'angle', 'elevation', 'NDVI']
predictand = ['ssm']

##### Import Data #####

Below data for this first neural network is imported, from the function prepare_10kmdata stored in the helper file buildmodel. This file imports, normalizes, and concatenates data time series from all random points.

We also import data for the second neural network at this point so we can evaluate how the first neural network (that predicts SMAP at 10km from remote sensing and land/soil/elevation data) correlates to in situ measurements when the test data at 40m is passed through  to predict.

In [4]:
# Bring in data for part I (10km resolution)
X_train, y_train, X_test, y_test, X_length = prepare_10kmdata(file1, predictors, predictand, lc_bands, dropna = True)

# Bring in data for part II (30m resolution) (in order to conduct preliminary evaluation of first neural net)
insitu_file = path+'DataDownload/InSitu/SoilMoistureDataFrameGreaterThan80.csv'
predictors1 = ['B1', 'B10', 'B11', 'B12', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8',
       'B8A', 'B9', 'VH', 'VV', 'angle', 'elevation', 'NDVI']
predictors2 = ['ssm']
predictand = ['InSituSM']
X2_1_train, X2_2_train, y2_train, X2_1_test, X2_2_test, y2_test, X2_length = prepare_40mdata(
                            file2,insitu_file, predictors1, predictors2, predictand, lc_bands, dropna = True)

##### Build First Neural Network

Now we build the first neural network. We use the keras functional Model API as opposed to to the sequential API as demonstrated in class to add additional flexibility that will enable our unique model design

**Hyperparameter Tuning**
To tune hyperparameters, set tune_params to True. However this has already been run and the results have been implemented into the hyperparameters below.

In [5]:
tune_params = True
tune_1st_NN(max_evals = 5)

IndentationError: unexpected indent (<unknown>, line 1)

In [None]:
# def data():
#     path = '/Users/Mitchell/Documents/MLEnvironment/SoilMoistureDownscalingEAEE4000/'
#     file1 =  path + 'DataDownload/pointsamples/Point100Sample.csv'
#     file2 = path + 'DataDownload/InSitu/InSituGEEOutputs/SMGaugePoints2.csv'
#     lc_bands = list(get_landcover_bands([file1,file2]))
#     predictors = ['B1', 'B10', 'B11', 'B12', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8',
#            'B8A', 'B9', 'VH', 'VV', 'angle', 'elevation', 'NDVI']
#     predictand = ['ssm']
#     X_t, y_t, _, _, _ = prepare_10kmdata(file1,
#                      predictors, predictand, lc_bands, dropna = True)
#     index = int(0.80*len(X_t))
#     X_train = X_t[:index] 
#     Y_train = y_t[:index]
#     X_test = X_t[index : ]
#     Y_test = y_t[index :]
#     return X_train, Y_train,X_test, Y_test 

# def model(X_train, Y_train,X_test, Y_test):
#     # --- Define layers with hyperparameter tuning built in -----

#     from tensorflow.keras.models import Model, load_model
#     from tensorflow.keras.layers import Dense, Dropout
#     from tensorflow.keras import Input

#     # Hyperparameter Tuning options
#     n_neuron_choice = {{choice([16, 32, 64, 128])}}
#     learning_rate_choice = {{choice([0.01,0.005, 0.001,0.0005, 0.0001,0.00001])}}
#     dropout_choice = {{uniform(0.,1.)}}
#     epoch_choice = {{choice([5,10,15,20,30,40,50])}}
#     layers_choice = {{choice(['two', 'three'])}}
#     activation     = 'relu'
#     minibatch_size = 64
#     # Model input
#     input1 = Input(shape= (X_train.shape[1],))
#     # Layer 1
#     dense1 = Dense(n_neuron_choice,  activation=activation)
#     # Layer 2
#     dense2 = Dense(n_neuron_choice,  activation=activation)
#     #Layer 3
#     dense3 = Dense(n_neuron_choice,  activation=activation)
#     # Dropout 
#     dropout_layer = Dropout(dropout_choice)
#     #Output
#     output_layer_1 = Dense(Y_train.shape[1],  activation='linear')
#     # --- Build Model ---
#     x = dense1(input1)
#     x = dense2(x)
#     x = dense3(x)
#     x = dropout_layer(x)
#     output1 = output_layer_1(x)
#     model = Model(inputs = input1, outputs = output1)
#     model.compile(loss='mse',
#                 optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate_choice))
#     early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=20)
#     model.fit(X_train, Y_train, 
#                         batch_size      = minibatch_size,
#                         epochs          = epoch_choice,
#                         validation_data=(X_test, Y_test),
#                         verbose         = 2,
#                         callbacks       = [early_stop])
#     loss = model.evaluate(X_test,  Y_test, verbose=0)
#     return {'loss': loss, 'status': STATUS_OK, 'model': model}



In [None]:
# best_run, best_model = optim.minimize(model=model,
#                                           data= data,
#                                           algo= tpe.suggest,
#                                           max_evals=100,
#                                           trials= Trials(),
#                                           notebook_name = 'FusedModel1',
#                                           eval_space = True)

In [None]:
# set hyperparameters based on tuning
n_neuron       = 64
activation     = 'relu'
num_epochs     = 15
learning_rate  = 0.0005
minibatch_size = 64
model_num      = 1
dropout_rate = 0
n_layers = 'three'

In [None]:
# --- Define layers -----
# Model input
input1 = Input(shape= (X_train.shape[1],))
# Layer 1
dense1 = Dense(n_neuron,  activation=activation)
# Layer 2
dense2 = Dense(n_neuron,  activation=activation)
#Layer 3
dense3 = Dense(n_neuron,  activation=activation)
## Dropout (have been playing around with this)
dropout_layer = Dropout(rate = dropout_rate)
#Output
output_layer_1 = Dense(y_train.shape[1],  activation='linear')

# --- Build Model ---
x = dense1(input1)
x = dense2(x)
if n_layers == 3:
    x = dense3(x)
x = dropout_layer(x)
output1 = output_layer_1(x)

model1 = Model(inputs = input1, outputs = output1)
model1.compile(loss='mse',optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate))
model1.summary()

##### Train Model
We train the model on 10km input data and ______ points to predict SMAP at 10 km

In [None]:
early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=20)
history1 = model1.fit(X_train, y_train, 
            batch_size      = minibatch_size,
            epochs          = num_epochs,
            validation_split =0.2,
            verbose         = 2,
            callbacks       = [early_stop])



#### Evaluate training of first model

We first evaluate how this model does at actually predicting its predictand: SMAP surface soil moisture at 10km resolution, and plot the history of the model training.

In [None]:
loss = model1.evaluate(X_test, y_test)
plot_history(history1)
plt.title('First NN Training, Loss = {}'.format(loss))
plt.show()
plt.clf()




Now we evaluate if this can better predict in situ soil moisture at 40m when 40m satellite data is passed to the same trained model.

This rests under the assumptions that:
1. the relationships between soil mositure and our non-soil mositure satellite predictands holds up similarly at a high resolution vs the 10km resolution on which it was trained.
2. ____________

In [None]:
# See how this first testing does on its own for predicting in situ! We will plot this later when we compare results
# But since the model weights are changing with later training we get the accuracy values now

firstnn_y_test_pre = model1.predict(X2_1_test)[:,0]
in_situ_truth = y2_test[:,0]
firstnn_loss = metrics.mean_squared_error(in_situ_truth, firstnn_y_test_pre)
firstnn_pearson_r = pearsonr(in_situ_truth, firstnn_y_test_pre).statistic

Now we build the second neural network

In [None]:
# if tune_params:
#     def data():
#         path = '/Users/Mitchell/Documents/MLEnvironment/SoilMoistureDownscalingEAEE4000/'
#         file1 =  path + 'DataDownload/pointsamples/Point100Sample.csv'
#         file2 = path + 'DataDownload/InSitu/InSituGEEOutputs/SMGaugePoints2.csv'
#         insitu_file = path+'DataDownload/InSitu/SoilMoistureDataFrameGreaterThan80.csv'
#         lc_bands = list(get_landcover_bands([file1,file2]))
#         predictors = ['B1', 'B10', 'B11', 'B12', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8',
#                'B8A', 'B9', 'VH', 'VV', 'angle', 'elevation', 'NDVI']
#         predictand = ['ssm']
#         X1_tr, X2_tr, y2_tr, _,_,_, _ = prepare_40mdata(
#                             file2,insitu_file, predictors1, predictors2, predictand, lc_bands, dropna = True)
#         index = int(0.80*len(y2_tr))
#         X1_train = X1_tr[:index] 
#         X2_train = X2_tr[:index] 
#         Y_train = y2_tr[:index]
#         X1_test = X1_tr[index:] 
#         X2_test = X2_tr[index:] 
#         Y_test = y2_tr[index:]
        
#         return X1_train,X2_train, Y_train, X1_test,X2_test, Y_test

#     def model(X1_train,X2_train, Y_train, X1_test,X2_test, Y_test):
#         # hyperparameters from last tuning:
#         # set hyperparameters based on tuning
#         n_neuron       = 64
#         activation     = 'relu'
#         num_epochs     = 15
#         learning_rate  = 0.0005
#         minibatch_size = 64
#         model_num      = 1
#         dropout_rate = 0
#         n_layers = 'three'
#         # Model input
#         input1 = Input(shape= (X_train.shape[1],))
#         dense1 = Dense(n_neuron,  activation=activation)
#         dense2 = Dense(n_neuron,  activation=activation)
#         dense3 = Dense(n_neuron,  activation=activation)
#         dropout_layer = Dropout(rate = dropout_rate)
#         output_layer_1 = Dense(y_train.shape[1],  activation='linear')
#         x = dense1(input1)
#         x = dense2(x)
#         if n_layers == 3:
#             x = dense3(x)
#         x = dropout_layer(x)
#         output1 = output_layer_1(x)

#         model1 = Model(inputs = input1, outputs = output1)
#         model1.compile(loss='mse',optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate))
#         early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=20)
#         model.fit(X_train, Y_train, 
#                         batch_size      = minibatch_size,
#                         epochs          = epoch_choice,
#                         validation_data=(X_test, Y_test),
#                         verbose         = 2,
#                         callbacks       = [early_stop])
#         # --- Define layers with hyperparameter tuning built in -----

#         from tensorflow.keras.models import Model, load_model
#         from tensorflow.keras.layers import Dense, Dropout
#         from tensorflow.keras import Input

#         # Hyperparameter Tuning options
#         n_neuron_choice = {{choice([16, 32, 64, 128])}}
#         learning_rate_choice = {{choice([0.01,0.005, 0.001,0.0005, 0.0001,0.00001])}}
#         dropout_choice = {{uniform(0.,1.)}}
#         epoch_choice = {{choice([5,10,15,20,30,40,50])}}
#         layers_choice = {{choice(['two', 'three'])}}
#         trainable =  = {{choice(['none','last', 'last2', 'last3'])}}
#         activation     = 'relu'
#         minibatch_size = 64


#         input2 = Input(shape= (1,))

#         input_concat = Concatenate()([output1, input2 ])
#         dense1_2 = Dense(n_neuron_choice,  activation=activation)
#         # Layer 2
#         dense2_2 = Dense(n_neuron_choice,  activation=activation)
#         # Layer 3
#         dense3_2 = Dense(n_neuron_choice,  activation=activation)
#         #output
#         output_layer_2 = Dense(y_train.shape[1],  activation='linear')
#         #dropout
#         dropout_layer = Dropout(rate = dropout_choice)
        
#         dense1.Trainable = False
#         dense2.Trainable = False
#         dense3.Trainable = False
#         if trainable == 'last':
#             dense3.Trainable = True
#         elif trainable == 'last2':
#             dense2.Trainable = True
#             dense3.Trainable = True
#         elif trainable == 'last3':
#             dense1.Trainable = True
#             dense2.Trainable = True
#             dense3.Trainable = True
            
            
        

#         x = dense1_2(input_concat)
#         x = dense2_2(x)
#         if layers_choice == 'three':
#             x = dense3_2(x)
#         x = dropout_layer(x)
#         output2 = output_layer_2(x)

#         model2 = Model(inputs = [input1, input2], outputs = output2)
        
#         model.compile(loss='mse',
#                     optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate_choice))

#         early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=20)
#         model.fit((X1_train,X2_train), Y_train, 
#                             batch_size      = minibatch_size,
#                             epochs          = epoch_choice,
#                             validation_data= (X_test, Y_test),
#                             verbose         = 2,
#                             callbacks       = [early_stop])


#         loss = model.evaluate((X1_test,X2_test),  Y_test, verbose=0)
#         print('loss: ' , loss)

#         return {'loss': loss, 'status': STATUS_OK, 'model': model}


#     # model(*data())
#     best_run, best_model = optim.minimize(model=model,
#                                               data=data,
#                                               algo=tpe.suggest,
#                                               max_evals=1000,
#                                               trials=Trials(),
#                                               notebook_name = 'FusedModel1',
#                                               eval_space=True)
#     print(best_run)

In [None]:
# set hyperparameters
n_neuron       = 64
activation     = 'relu'
num_epochs     = 50
learning_rate  = 0.0001
minibatch_size = 64

In [None]:
# Step 2, build 2nd neural net

input2 = Input(shape= (1,))

input_concat = Concatenate()([output1, input2 ])
dense1_2 = Dense(n_neuron,  activation=activation)
# Layer 2
dense2_2 = Dense(n_neuron,  activation=activation)
# Layer 3
dense3_2 = Dense(n_neuron,  activation=activation)
#output
output_layer_2 = Dense(y_train.shape[1],  activation='linear')
#dropout
# dropout_layer = Dropout(rate = 0.3)


x = dense1_2(input_concat)
x = dense2_2(x)
x = dense3_2(x)
x = dropout_layer(x)
output2 = output_layer_2(x)

model2 = Model(inputs = [input1, input2], outputs = output2)

In [None]:
dense1.Trainable = False
dense2.trainable = False
dense3.trainable = True

In [None]:
model2.compile(loss='mse',optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate))
model2.summary()

In [None]:
plot_model(model2)

In [None]:
# Train the model
early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=20)

history2 = model2.fit([X2_1_train, X2_2_train], y2_train, 
                    batch_size      = minibatch_size,
                    epochs          = num_epochs,
                    validation_split= 0.2, 
                    verbose         = 1,
                    callbacks       = [early_stop])


In [None]:
plot_history(history2)
plt.show()
plt.clf()

In [None]:
plt.scatter(in_situ_truth,firstnn_y_test_pre, s = 2)
plt.xlabel('Truth')
plt.ylabel('Predicted')
plt.xlim(-3,3)
plt.ylim(-3,3)
plt.title('First NN vs In Situ, MSE = {}, $R_p$ = {}'
          .format(round(firstnn_loss,3), round(firstnn_pearson_r,3)))
plt.show()
plt.clf()


In [None]:
y_smap = X2_2_test[:,0]
print(y_smap.shape)

smap_insitu_r = pearsonr(in_situ_truth, y_smap).statistic
smap_insitu_loss = metrics.mean_squared_error(in_situ_truth, y_smap)


plt.scatter(y_truth,y_smap, s = 2)
plt.xlabel('Truth')
plt.ylabel('Predicted')
plt.xlim(-3,3)
plt.ylim(-3,3)
plt.title('SMAP vs In Situ, MSE = {}, $R_p$ = {}'.format(smap_insitu_loss, smap_insitu_r))
plt.show()
plt.clf()

In [None]:

transfer_y_test_pre = model2.predict([X2_1_test, X2_2_test])[:,0]


transfer_insitu_r = pearsonr(in_situ_truth, transfer_y_test_pre).statistic
transfer_insitu_loss = metrics.mean_squared_error(in_situ_truth, transfer_y_test_pre)

plt.scatter(y_truth,y_test_pre, s = 2)
plt.xlabel('Truth')
plt.ylabel('Predicted')
plt.xlim(-3,3)
plt.ylim(-3,3)
plt.title('Transfer Model vs In Situ Testing, MSE = {}, $R_p$ = {}'.format(transfer_insitu_loss,transfer_insitu_r ))
plt.show()
plt.clf()



