## Transfer Learning from CNN LSTM


### Method 3 Manual pooling and appending

In [1]:
import imp
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import xarray as xr
import sys
import tensorflow as tf
from tensorflow import keras

%matplotlib inline

from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.impute import SimpleImputer

import os
tf.random.set_seed(42)


sys.path.insert(0, '../../src')
from utils import df_to_xarray,read_xarray,inverse_scale_image, get_point_prediction

sys.path.insert(0, '../../src/preprocess')
from data_preprocess import preprocess_image_reduced,preprocess_images

2022-03-22 09:06:55.787044: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0


## Image Data and Model Loading

In [3]:
import tensorflow.keras.backend as kb
import tensorflow as tf
from tensorflow.keras import backend as K

def custom_rmse(y_true, y_pred):
    """
    custom_rmse(y_true, y_pred)
    calculates root square mean value with focusing only on the ocean
    """
    y_pred = y_pred[(y_true != 0) & (y_true != 0.0)]
    y_true = y_true[(y_true != 0) & (y_true != 0.0)]
    
    y_pred = tf.convert_to_tensor(y_pred)
    y_true = tf.cast(y_true, y_pred.dtype)

    return K.sqrt(K.mean(tf.math.squared_difference(y_pred, y_true),axis= -1))

In [4]:
sys.path.insert(0, '../../src/preprocess')
from data_preprocess import preprocess_image_reduced,preprocess_images


# Image component 
dir_name="../../data/data1"
val_dir_name="../../data/data2"

data,pco2 = preprocess_images(dir_name)
data_socat, pco2_socat = preprocess_images(dir_name, socat = True)
val_data,val_pco2 = preprocess_images(val_dir_name,"035")
val_data_socat, val_pco2_socat = preprocess_images(val_dir_name,"035", socat = True)


ecCodes library not found using ['eccodes', 'libeccodes.so', 'libeccodes']


In [5]:
INPUT_SHAPE=data_socat[0].shape

INPUT_SHAPE

(180, 360, 5)

In [6]:
base_model = tf.keras.models.load_model('../../models/base_model/base_model_new.h5', custom_objects={'custom_rmse':custom_rmse})

2022-03-22 09:08:07.163328: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2022-03-22 09:08:07.254709: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1
2022-03-22 09:08:07.438016: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: 
pciBusID: 0000:3b:00.0 name: Quadro RTX 8000 computeCapability: 7.5
coreClock: 1.62GHz coreCount: 72 deviceMemorySize: 44.49GiB deviceMemoryBandwidth: 581.23GiB/s
2022-03-22 09:08:07.438062: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0
2022-03-22 09:08:08.259405: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.11
2022-03-22 09:08:08.259472: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublasLt.so.11
2022-0

In [7]:
base_model.summary()


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 180, 360, 64)      8064      
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 180, 360, 64)      102464    
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 60, 120, 64)       0         
_________________________________________________________________
dropout (Dropout)            (None, 60, 120, 64)       0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 60, 120, 128)      204928    
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 60, 120, 128)      409728    
_________________________________________________________________
up_sampling2d (UpSampling2D) (None, 180, 360, 128)     0

In [7]:
tf.keras.backend.clear_session()

https://www.tensorflow.org/tutorials/images/transfer_learning

## Taking multiple Inputs
https://www.pyimagesearch.com/2019/02/04/keras-multiple-inputs-and-mixed-data/


In [None]:
# feature extraction

base_model.trainable = False
feature_batch = base_model(data_socat)
feature_batch= np.expand_dims(feature_batch, axis=3)

2022-03-22 09:08:24.593867: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudnn.so.8
2022-03-22 09:09:30.929846: W tensorflow/core/kernels/gpu_utils.cc:49] Failed to allocate memory for convolution redzone checking; skipping this check. This is benign and only means that we won't check cudnn for out-of-bounds reads and writes. This message will only be printed once.
2022-03-22 09:09:34.156805: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.11
2022-03-22 09:09:40.080833: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublasLt.so.11


In [115]:
global_average_layer = tf.keras.layers.GlobalAveragePooling2D()
feature_batch_average = global_average_layer(feature_batch)
prediction_layer = tf.keras.layers.Dense(256)
weight = prediction_layer(feature_batch_average)

prediction_layer2 = tf.keras.layers.Dense(64)
weight = prediction_layer2(weight)

prediction_layer3 = tf.keras.layers.Dense(16)
weight = prediction_layer3(weight)
weight.shape

TensorShape([421, 16])

In [116]:
image_weight=weight.numpy()

In [103]:
# Reading Data
dir_name="../../data/data1"
chl,mld,sss,sst,u10,fg_co2,xco2,icefrac,patm,pco2=read_xarray(dir_name)

# Creating one singular df
data_read=xr.merge([mld.MLD,mld.MLD_socat,sst.SST,sst.SST_socat,sss.SSS,sss.SSS_socat,xco2])
tmp_data=data_read.to_dataframe().reset_index()

tmp_data=tmp_data.drop(columns=['bnds','TLONG', 'TLAT', 'time_bnds'])

chl_data=chl.Chl.to_dataframe().reset_index()
chl_data_socat=chl.Chl_socat.to_dataframe().reset_index()
pco2_data=pco2.pCO2.to_dataframe().reset_index()
pco2_data_socat=pco2.pCO2_socat.to_dataframe().reset_index()

tmp_data["Chl_socat"]=chl_data_socat["Chl_socat"]
tmp_data["Chl"]=chl_data["Chl"]
tmp_data["pCO2_socat"]=pco2_data_socat["pCO2_socat"]
tmp_data["pCO2"]=pco2_data["pCO2"]

features_socat = ['time', 'xlon', 'ylat','MLD_socat', 'SST_socat', 'SSS_socat','Chl_socat', 'XCO2','pCO2_socat']
features = ['time', 'xlon', 'ylat','MLD','SST','SSS','Chl','XCO2','pCO2']

# create separate dataframe for socat
combined_socat=tmp_data.loc[:,features_socat]
combined=tmp_data.loc[:,features]

# drop rows where pco2 or pco2_socat == NA
combined_socat.dropna(subset = ["pCO2_socat"], inplace=True)
combined_socat= combined_socat[(combined_socat['pCO2_socat']!=0) & (combined_socat['SST_socat']!=0) ]
combined.dropna(subset = ["pCO2"], inplace=True)
combined= combined[(combined['pCO2']!=0) & (combined['SST']!=0)]

In [117]:
time_to_id = dict()

times=np.unique(combined_socat['time'])

for t in range(len(times)):
    time=times[t]
    time_to_id[time]=t

combined_socat['time']=combined_socat['time'].map(time_to_id)
combined['time']=combined['time'].map(time_to_id)



In [118]:

weight_df = pd.DataFrame(image_weight, columns =np.arange(image_weight.shape[1]))
weight_df=weight_df.loc[combined_socat["time"]]

combined_socat.reset_index(drop=True, inplace=True)
weight_df.reset_index(drop=True, inplace=True)

new_data_socat = pd.concat( [combined_socat, weight_df], axis=1)


weight_df = pd.DataFrame(image_weight, columns =np.arange(image_weight.shape[1]))
weight_df=weight_df.loc[combined["time"]]

combined.reset_index(drop=True, inplace=True)
weight_df.reset_index(drop=True, inplace=True)

new_data = pd.concat([combined, weight_df], axis=1)

In [119]:
last=len(new_data_socat.columns)-1

In [120]:

new_data_socat.insert(last, 'pCO2_socat', new_data_socat.pop('pCO2_socat'))

new_data.insert(last, 'pCO2', new_data.pop('pCO2'))

new_data_socat.head()

Unnamed: 0,time,xlon,ylat,MLD_socat,SST_socat,SSS_socat,Chl_socat,XCO2,0,1,...,7,8,9,10,11,12,13,14,15,pCO2_socat
0,270,0.5,-51.5,99.849854,2.587441,33.649975,0.142726,377.884216,6.3268,-0.523958,...,-0.428963,17.076962,0.672102,3.285192,4.768794,4.299695,11.357102,4.436784,-5.020576,318.103145
1,73,0.5,25.5,,,,0.12899,350.947815,4.594323,-0.380479,...,-0.311503,12.400743,0.48806,2.385596,3.462949,3.122298,8.247158,3.221848,-3.645777,311.86518
2,73,0.5,25.5,,,,0.127381,350.947815,4.594323,-0.380479,...,-0.311503,12.400743,0.48806,2.385596,3.462949,3.122298,8.247158,3.221848,-3.645777,311.544856
3,253,0.5,25.5,,,,0.132072,375.006958,6.115148,-0.506426,...,-0.414617,16.50569,0.649619,3.175293,4.609261,4.155852,10.977163,4.288356,-4.852616,315.684104
4,11,0.5,26.5,,,,0.135994,342.133972,4.745259,-0.392978,...,-0.321737,12.808158,0.504098,2.46398,3.576717,3.22488,8.518113,3.327701,-3.765551,317.861697


In [121]:

X_socat=new_data_socat.iloc[:,3:-1]
X=new_data.iloc[:,3:-1]
y=new_data.loc[:,'pCO2']
y_socat=new_data_socat.loc[:,'pCO2_socat']
X_socat.head()

Unnamed: 0,MLD_socat,SST_socat,SSS_socat,Chl_socat,XCO2,0,1,2,3,4,...,6,7,8,9,10,11,12,13,14,15
0,99.849854,2.587441,33.649975,0.142726,377.884216,6.3268,-0.523958,-10.646767,-6.930975,10.113083,...,21.753256,-0.428963,17.076962,0.672102,3.285192,4.768794,4.299695,11.357102,4.436784,-5.020576
1,,,,0.12899,350.947815,4.594323,-0.380479,-7.731336,-5.033049,7.343796,...,15.796515,-0.311503,12.400743,0.48806,2.385596,3.462949,3.122298,8.247158,3.221848,-3.645777
2,,,,0.127381,350.947815,4.594323,-0.380479,-7.731336,-5.033049,7.343796,...,15.796515,-0.311503,12.400743,0.48806,2.385596,3.462949,3.122298,8.247158,3.221848,-3.645777
3,,,,0.132072,375.006958,6.115148,-0.506426,-10.290595,-6.699115,9.774763,...,21.025539,-0.414617,16.50569,0.649619,3.175293,4.609261,4.155852,10.977163,4.288356,-4.852616
4,,,,0.135994,342.133972,4.745259,-0.392978,-7.985343,-5.198405,7.585066,...,16.315487,-0.321737,12.808158,0.504098,2.46398,3.576717,3.22488,8.518113,3.327701,-3.765551


In [122]:
num_pipeline = Pipeline([
        ('imputer', SimpleImputer(strategy="mean")),
        ('std_scaler', StandardScaler()),
    ])

X_socat=num_pipeline.fit_transform(X_socat)
X=num_pipeline.fit_transform(X)

In [123]:
def FFN_Model():
    model = tf.keras.models.Sequential([
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(1024, activation="elu"),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(512, activation="elu"),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(256, activation="elu"),
    tf.keras.layers.Dense(64, activation="elu"),
    tf.keras.layers.Dense(1,activation="linear")])

    model.compile(optimizer='adam', loss='mean_squared_error')
    
    return model

In [124]:
X_socat_train, X_socat_test, y_socat_train, y_socat_test = train_test_split(
    X_socat, y_socat, test_size=0.2, random_state= 73)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state= 73)

In [125]:
model_path = '../../models/Simple_NN_Full.h5'

early_stopings = tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1, mode='min')
checkpoint =  tf.keras.callbacks.ModelCheckpoint(model_path, monitor='val_loss', save_best_only=True, mode='min', verbose=0)
callbacks=[early_stopings,checkpoint] 

In [None]:
n_epochs = 100

model = FFN_Model()
history = model.fit(X_socat_train, y_socat_train, epochs=n_epochs, 
                    batch_size=2056, validation_data=(X_socat_test, y_socat_test),callbacks=callbacks)

Epoch 1/100
Epoch 2/100

In [None]:
tf.keras.backend.clear_session()