# Modifications

Link to Interactive Notebook: 
https://colab.research.google.com/drive/1sbJTsgCsAQwCkGdLXK7EdgkaTpiTGBM1#scrollTo=T71qxHyh9p23

1. Reduced Input Dimension
2. Different handling of xco2
3. Custom rmse function

What has helped the most?
- ELU
- ADAM
- MODEL Architecture

batch size?

In [None]:
import imp
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import xarray as xr
import sys
import tensorflow as tf
from tensorflow import keras
import os
from sklearn.model_selection import train_test_split
tf.random.set_seed(42)


sys.path.insert(0, '../../src')

from utils import df_to_xarray,read_xarray,inverse_scale_image, get_point_prediction

In [None]:
# Reading Data
dir_name="../../data/data1"
val_dir_name="../../data/data2"

## Data Preprocessing

In [None]:
sys.path.insert(0, '../../src/preprocess')

from data_preprocess import preprocess_images

X, pco2_images = preprocess_images(dir_name)

In [None]:
INPUT_SHAPE=X[0].shape
OUTPUT_SHAPE=pco2_images[0].shape

INPUT_SHAPE

In [None]:
pco2_images.shape

In [None]:
plt.imshow(pco2_images[1],cmap="RdBu", interpolation="nearest")

## Modeling

### Image Segmentation
CNN - Unet

Reference: 
https://towardsdatascience.com/understanding-semantic-segmentation-with-unet-6be4f42d4b47


In [None]:
sys.path.insert(0, '../../src')
from utils import df_to_xarray,read_xarray, custom_rmse
from functools import partial

DefaultConv2D = partial(keras.layers.Conv2D,
                        kernel_size=5,activation='elu',
                        padding="SAME")




### Base Model

In [None]:
tf.keras.backend.clear_session()

In [None]:


base_model = keras.models.Sequential([
    DefaultConv2D(filters=64, input_shape=INPUT_SHAPE),
    DefaultConv2D(filters=64),
    keras.layers.MaxPooling2D(pool_size=3), #pool size 3 > pool size 2
    keras.layers.Dropout(0.3),# drop out at the end of the deepest

    DefaultConv2D(filters=128),
    DefaultConv2D(filters=128),


    keras.layers.UpSampling2D(size=3),
    DefaultConv2D(filters=64),    
    DefaultConv2D(filters=2),
    DefaultConv2D(filters=1,kernel_size=1),
    keras.layers.Reshape(OUTPUT_SHAPE)
   
])


## BEST SO FAR ##

base_model.summary()

In [None]:
myLearnRate=0.001
custom_opt = tf.keras.optimizers.Adam(learning_rate=myLearnRate)
base_model.compile(loss=custom_rmse, optimizer=custom_opt)

In [None]:
model_path="../models/base_model/base_model_new.h5"
early_stopings = tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=5, verbose=1, mode='min')
checkpoint =  tf.keras.callbacks.ModelCheckpoint(model_path, monitor='val_loss', save_best_only=True, mode='min', verbose=0)
callbacks=[early_stopings,checkpoint]

#batch size 24, 32, 64 not as good
# batch size 16 the best
history = base_model.fit(X,pco2_images, epochs=100, validation_data=(X,pco2_images),workers=-1,batch_size=16,callbacks=callbacks)



### Base Model Result

In [None]:

best_model = tf.keras.models.load_model('../models/base_model/base_model_new.h5', custom_objects={'custom_rmse':custom_rmse})
predicted_image=best_model.predict(X,verbose=1)

In [None]:
predicted_image[pco2_images==0]=0.0

In [None]:
figure, axis = plt.subplots(2, 2,figsize=(12, 6))


img=axis[0][0].imshow(np.flipud(predicted_image[0]),cmap="coolwarm", interpolation="nearest")
axis[0][0].set_title("prediction")
plt.colorbar(img,ax=axis)

img1=axis[0][1].imshow(np.flipud(pco2_images[0]),cmap="coolwarm", interpolation="nearest")
axis[0][1].set_title("true")

diff=np.flipud(np.squeeze(predicted_image[0]-pco2_images[0]))
img2=axis[1][0].imshow(diff,cmap="RdBu", interpolation="nearest")
axis[1][0].set_title("residual")
plt.colorbar(img2,ax=axis)



plt.savefig('../assets/cnn-unet.png')

plt.show()





In [None]:

import imageio
import matplotlib.colors as mcolors

d = predicted_image - pco2_images

norm = mcolors.TwoSlopeNorm(vmin=d.min(), vmax = d.max(), vcenter=0)

In [None]:
filenames = []
for i in range(421):
    # plot the line chart
    figure, axis = plt.subplots(2, 2,figsize=(12, 6))

    img=axis[0][0].imshow(np.flipud(predicted_image[i]),cmap="coolwarm", interpolation="nearest")
    axis[0][0].set_title("prediction")
    plt.colorbar(img,ax=axis)

    img1=axis[0][1].imshow(np.flipud(pco2_images[i]),cmap="coolwarm", interpolation="nearest")
    axis[0][1].set_title("true")

    diff=np.flipud(np.squeeze(predicted_image[i]-pco2_images[i]))
    img2=axis[1][0].imshow(diff,cmap="RdBu", interpolation="nearest",norm=norm)
    axis[1][0].set_title("residual")
    plt.colorbar(img2,ax=axis)
    
    # create file name and append it to a list
    filename = f'{i}.png'
    filenames.append(filename)
    
    # save frame
    plt.savefig(filename)
    plt.close()
# build gif
with imageio.get_writer('../assets/cnn-unet.gif', mode='I') as writer:
    for filename in filenames:
        image = imageio.imread(filename)
        writer.append_data(image)
        
# Remove files
for filename in set(filenames):
    os.remove(filename)

In [None]:
rmses = []

for i in range(421):    
    rmse = np.sqrt(np.mean((pco2_images[i]-predicted_image)**2))
    rmses.append(rmse)
    
plt.plot(rmses)
plt.savefig('../assets/unet-overtime.png')

## Model 1

- more complex model with a greater parameters
- need a different learning rate

In [None]:

#kernel_size matters, 2 does not work

model1 = keras.models.Sequential([
    DefaultConv2D(filters=32, input_shape=INPUT_SHAPE),
    DefaultConv2D(filters=32),
    keras.layers.MaxPooling2D(pool_size=3),
    keras.layers.Dropout(0.35),

    DefaultConv2D(filters=64),
    DefaultConv2D(filters=64),
    keras.layers.MaxPooling2D(pool_size=3),
    keras.layers.Dropout(0.35),

    DefaultConv2D(filters=128),
    DefaultConv2D(filters=128),

    keras.layers.UpSampling2D(size=3),
    DefaultConv2D(filters=64),
    DefaultConv2D(filters=64),

    keras.layers.UpSampling2D(size=3),
    DefaultConv2D(filters=32),
    DefaultConv2D(filters=2),
    DefaultConv2D(filters=1,kernel_size=1),
    keras.layers.Reshape(OUTPUT_SHAPE)
   
])

model1.summary()


In [None]:
myLearnRate=0.001
custom_opt = tf.keras.optimizers.Adam(learning_rate=myLearnRate)
model1.compile(loss=custom_rmse, optimizer=custom_opt, metrics=["mean_squared_error"])

In [None]:
model_path="../models/base_model/reduceddim_model1.h5"
early_stopings = tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=5, verbose=1, mode='min')
checkpoint =  tf.keras.callbacks.ModelCheckpoint(model_path, monitor='val_loss', save_best_only=True, mode='min', verbose=0)
callbacks=[early_stopings,checkpoint]

history = model1.fit(X,pco2_images, epochs=100, validation_data=(X,pco2_images),workers=-1,batch_size=16,callbacks=callbacks)




In [None]:

best_model = tf.keras.models.load_model('../models/base_model/reduceddim_model1.h5', custom_objects={'custom_rmse':custom_rmse})
predicted_image=best_model.predict(X,verbose=1)

### Model 1 Result

In [None]:
import matplotlib.pyplot as plt

figure, axis = plt.subplots(1, 2)


img=axis[0].imshow(np.flipud(np.squeeze(predicted_image[1])), cmap="RdBu", interpolation="nearest")
axis[0].set_title("prediction")
plt.colorbar(img,ax=axis)

img1=axis[1].imshow(np.flipud(np.squeeze(pco2_images[419:421][1])), cmap="RdBu", interpolation="nearest")
axis[1].set_title("true")
plt.show()



In [None]:
diff=np.flipud(np.squeeze(pco2_images[419:421][1]-predicted_image[1]))
plt.imshow(diff,cmap="RdBu", interpolation="nearest")
plt.colorbar()
plt.title("Residual Plot")

plt.show()

In [None]:
# Over time

rmses = []

for i in range(421):    
    rmse = np.sqrt(np.mean((pco2_images[i]-predicted_image)**2))
    rmses.append(rmse)

In [None]:
plt.plot(rmses)
plt.savefig('../assets/overtime.png')



## Getting PCO2 Prediction per Point

In [None]:
def inverse_scale_image(arr, df):
    """
    inverse_scale_image(arr, df):
    - inverses the pco2 scaling
    """
    
    old_min = np.nanmin(df)
    old_max = np.nanmax(df)

    y_pred = arr*(old_max-old_min)/255+old_min
    
    y_true=np.nan_to_num(df)
    y_pred[y_true==0]=0
    return y_true,y_pred


In [None]:
chl,mld,sss,sst,u10,fg_co2,xco2,icefrac,patm,pco2 = read_xarray(dir_name)

y_true,y_pred = inverse_scale_image(predicted_image,pco2.pCO2.data)  

In [None]:
print("Scaled back RMSE score:")
np.sqrt(np.mean((y_true-y_pred)**2))