## Transfer Learning from CNN 


### Method 1
- Data preprocess the same way for input
- try the same thing as tf_layer2 but with unet 

### Bench Marks
**Final Result for Random Forest trained on SOCAT**

Test Set RMSE: 30.56 | Whole Grid Rmse: 42.12


**Final Result for XGBoost trained on SOCAT**

Test Set RMSE:28.43698261274142 | Whole Grid Rmse:37.709863752151215

### Result

Test Set RMSE: 15.368 | Whole Grid RMSE: ~87


In [24]:
tf.keras.backend.clear_session()

In [1]:
import imp
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import xarray as xr
import sys
import tensorflow as tf
from tensorflow import keras

%matplotlib inline

from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.impute import SimpleImputer

import os
tf.random.set_seed(42)


sys.path.insert(0, '../../src')
from utils import df_to_xarray,read_xarray,inverse_scale_image, get_point_prediction

sys.path.insert(0, '../../src/preprocess')
from data_preprocess import preprocess_image_reduced,preprocess_images

2022-02-22 11:34:22.212967: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0


In [2]:
import tensorflow.keras.backend as kb
import tensorflow as tf
from tensorflow.keras import backend as K

def custom_rmse(y_true, y_pred):
    """
    custom_rmse(y_true, y_pred)
    calculates root square mean value with focusing only on the ocean
    """
    y_pred = y_pred[y_true != 0]
    y_true = y_true[y_true != 0]
    y_pred = tf.convert_to_tensor(y_pred)
    y_true = tf.cast(y_true, y_pred.dtype)
    
    return K.sqrt(K.mean(tf.math.squared_difference(y_pred, y_true), axis=-1))

In [3]:
dir_name="../../data/data1"
val_dir_name="../../data/data2"

data,pco2 = preprocess_images(dir_name)
data_socat, pco2_socat = preprocess_images(dir_name, socat = True)

val_data,val_pco2 = preprocess_images(val_dir_name,"035")
val_data_socat,val_pco2_socat = preprocess_images(val_dir_name,"035",socat=True)


ecCodes library not found using ['eccodes', 'libeccodes.so', 'libeccodes']


In [4]:
INPUT_SHAPE=data_socat[0].shape
OUTPUT_SHAPE=pco2_socat[0].shape

INPUT_SHAPE

(180, 360, 5)

In [27]:
from tensorflow.keras.layers import Dense, Reshape, Conv2D, MaxPool2D , Flatten, Input

base_model = tf.keras.models.load_model('../../models/base_model/base_model_new.h5', custom_objects={'custom_rmse':custom_rmse})


In [28]:
for index, layer in enumerate(base_model.layers):
    print(index, layer)

0 <tensorflow.python.keras.layers.convolutional.Conv2D object at 0x1554a433d640>
1 <tensorflow.python.keras.layers.convolutional.Conv2D object at 0x1554a4337c10>
2 <tensorflow.python.keras.layers.pooling.MaxPooling2D object at 0x1554a4337850>
3 <tensorflow.python.keras.layers.core.Dropout object at 0x1554c033a430>
4 <tensorflow.python.keras.layers.convolutional.Conv2D object at 0x1554a433d7c0>
5 <tensorflow.python.keras.layers.convolutional.Conv2D object at 0x1554a433d6d0>
6 <tensorflow.python.keras.layers.convolutional.UpSampling2D object at 0x1554a433d1c0>
7 <tensorflow.python.keras.layers.convolutional.Conv2D object at 0x1554a4331550>
8 <tensorflow.python.keras.layers.convolutional.Conv2D object at 0x1554c047e760>
9 <tensorflow.python.keras.layers.convolutional.Conv2D object at 0x1554a4342fd0>
10 <tensorflow.python.keras.layers.core.Reshape object at 0x1554a43598b0>


In [29]:
for layer in base_model.layers[:-7]:
    layer.trainable = False

base_model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 180, 360, 64)      8064      
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 180, 360, 64)      102464    
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 60, 120, 64)       0         
_________________________________________________________________
dropout (Dropout)            (None, 60, 120, 64)       0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 60, 120, 128)      204928    
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 60, 120, 128)      409728    
_________________________________________________________________
up_sampling2d (UpSampling2D) (None, 180, 360, 128)     0

In [30]:
base_model.compile(
    loss=custom_rmse, optimizer=keras.optimizers.Adam(learning_rate=0.03),
)

model_path="../../models/transfer_CNN.h5"

early_stopings = tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=4, verbose=1, mode='min')
checkpoint =  tf.keras.callbacks.ModelCheckpoint(model_path, monitor='val_loss', save_best_only=True, mode='min', verbose=0)
callbacks=[early_stopings,checkpoint]

epochs = 30
batch_size = 16

In [31]:
base_model.fit(
    data_socat,
    pco2_socat,
    batch_size=batch_size,
    epochs=epochs,
    workers=-1,
    validation_data=(data_socat, pco2_socat),
    callbacks=callbacks,
)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 00007: early stopping


<tensorflow.python.keras.callbacks.History at 0x1554a43daa30>

### Assessing Performance

In [None]:
best_model = tf.keras.models.load_model('../../models/transfer_CNN.h5', custom_objects={'custom_rmse':custom_rmse})

predicted_frames=best_model.predict(data,verbose=1)

In [None]:
predicted_frames[y==0]=0.0

In [None]:
figure, axis = plt.subplots(2, 2,figsize=(12, 6))


img=axis[0][0].imshow(np.flipud(predicted_frames[0][1]),cmap="coolwarm", interpolation="nearest")
axis[0][0].set_title("prediction")
plt.colorbar(img,ax=axis)

img1=axis[0][1].imshow(np.flipud(y[0][1]),cmap="coolwarm", interpolation="nearest")
axis[0][1].set_title("true")

diff=np.flipud(np.squeeze(predicted_frames[0][1]-y[0][1]))
img2=axis[1][0].imshow(diff,cmap="RdBu", interpolation="nearest")
axis[1][0].set_title("residual")
plt.colorbar(img2,ax=axis)


img2=axis[1][1].imshow(np.flipud(X[0][1][:,:,5]),cmap="coolwarm", interpolation="nearest")
axis[1][1].set_title("input: previous pco2")

plt.savefig('../../assets/transfer_nfp.png')

plt.show()

### Inverse

In [None]:
def inverse_scale_image_nfp(arr, df):
    """
    inverse_scale_image(arr, df):
    - inverses the pco2 scaling
    """
    
    old_min = np.nanmin(df)
    old_max = np.nanmax(df)
    y_pred = arr*(old_max-old_min)/255+old_min
    
    tmp=np.nan_to_num(df[X_index][1:])
    y_true=np.expand_dims(tmp,axis=4)
    y_pred[y_true==0]=0
    return y_true,y_pred

In [None]:
chl,mld,sss,sst,u10,fg_co2,xco2,icefrac,patm,pco2 = read_xarray(dir_name)
y_true,y_pred=inverse_scale_image_nfp(predicted_frames,pco2.pCO2.data)  

In [None]:
print("Scaled back whole grid RMSE score:")
np.sqrt(np.mean((y_true[:,:1]-y_pred[:,:1])**2))