In [1]:
pip install pandas xarray numpy dask[array] scikit-learn tensorflow matplotlib

Collecting tensorflow
  Downloading tensorflow-2.13.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (524.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m524.1/524.1 MB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting tensorflow-estimator<2.14,>=2.13.0
  Downloading tensorflow_estimator-2.13.0-py2.py3-none-any.whl (440 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m440.8/440.8 kB[0m [31m69.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tensorflow-io-gcs-filesystem>=0.23.1
  Downloading tensorflow_io_gcs_filesystem-0.33.0-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (2.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m108.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting astunparse>=1.6.0
  Downloading astunparse-1.6.3-py2.py3-none-any.whl (12 kB)
Collecting gast<=0.4.0,>=0.2.1
  Downloading gast-0.4.0-py3-none-any.whl (9.8 kB)
Collecting absl-py>=1.0.0
  Downl

In [2]:
import pandas as pd
from pathlib import Path
import xarray as xr
import numpy as np
import calendar
import os.path

import dask.array as da
from dask.delayed import delayed
from sklearn.model_selection import train_test_split
import gc

from tensorflow.keras import layers, regularizers, optimizers

import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras.layers import Input, Dropout, Dense, Add, LayerNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping

2023-08-10 15:35:57.001931: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-08-10 15:35:57.045225: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-08-10 15:35:57.046377: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
ds = xr.open_dataset('/home/jovyan/sst.nc')

ds['time'] = ds['time'].dt.floor('D')

zarr_new = ds.sel(lat=slice(35, -5), lon=slice(45,90))

all_nan_dates = np.isnan(zarr_new["sst"]).all(dim=["lon", "lat"]).compute()

zarr_ds = zarr_new.sel(time=all_nan_dates == False)

zarr_ds = zarr_ds.sortby('time')
# zarr_ds = zarr_ds.sel(time=slice('2020-09-01', '2020-09-30'))
zarr_ds

In [4]:
import dask.array as da
from dask.delayed import delayed
from sklearn.model_selection import train_test_split
import gc

def preprocess_day_data(day_data):
    day_data = da.squeeze(day_data)
    mean_val = da.nanmean(day_data).compute()  # compute here to get scalar value
    return day_data - mean_val

def preprocess_data(zarr_ds, chunk_size=200):
    total_len = zarr_ds['sst'].shape[0]
    chunk_shape = (chunk_size,) + zarr_ds['sst'].shape[1:]  # Adjusted chunking
    chunks = []

    for start_idx in range(0, total_len, chunk_size):
        end_idx = min(start_idx + chunk_size, total_len)
        
        # Directly slice the dask array without wrapping it with da.from_array again
        chunk = zarr_ds['sst'][start_idx:end_idx]
        
        processed_chunk = chunk.map_blocks(preprocess_day_data)
        
        # Use da.where to replace NaNs with 0.0
        processed_chunk = da.where(da.isnan(processed_chunk), 0.0, processed_chunk)
        
        chunks.append(processed_chunk)

    return da.concatenate(chunks, axis=0)

processed_data = preprocess_data(zarr_ds).compute()

def prepare_data_from_processed(processed_data, window_size=5): 
    length = processed_data.shape[0]
    X, y = [], []

    for i in range(length - window_size):
        X.append(processed_data[i:i+window_size])
        y.append(processed_data[i+window_size])

    X, y = da.array(X), da.array(y)
    return X, y

X, y = prepare_data_from_processed(processed_data)

In [5]:
def time_series_split(X, y, train_ratio=0.7, val_ratio=0.2):
    total_length = X.shape[0]
    
    # Compute end indices for each split
    train_end = int(total_length * train_ratio)
    val_end = int(total_length * (train_ratio + val_ratio))
    
    X_train = X[:train_end]
    y_train = y[:train_end]
    
    X_val = X[train_end:val_end]
    y_val = y[train_end:val_end]
    
    X_test = X[val_end:]
    y_test = y[val_end:]
    
    return X_train, y_train, X_val, y_val, X_test, y_test

X_train, y_train, X_val, y_val, X_test, y_test = time_series_split(X, y)

In [6]:
def transformer_encoder(inputs, d_model, num_heads, ff_dim, dropout=0.1):
    # Self attention
    attention = tf.keras.layers.MultiHeadAttention(num_heads=num_heads, key_dim=d_model)(inputs, inputs, inputs)
    attn_output = tf.keras.layers.Add()([attention, inputs])
    out1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)(attn_output)
    
    # Feed-forward network
    ffn_output = tf.keras.models.Sequential([
        tf.keras.layers.Dense(ff_dim, activation="relu"),
        tf.keras.layers.Dense(d_model),
    ])(out1)
    out2 = tf.keras.layers.Add()([ffn_output, out1])
    return tf.keras.layers.LayerNormalization(epsilon=1e-6)(out2)

def create_transformer_model(input_shape=(5, 149, 181, 1)):
    inputs = tf.keras.layers.Input(shape=input_shape)
    
    # ConvLSTM layer with fewer filters
    x = tf.keras.layers.ConvLSTM2D(filters=16, kernel_size=(3, 3),
                                   padding='same', return_sequences=False)(inputs)
    x = tf.keras.layers.BatchNormalization()(x)
    
    # Asymmetric padding after ConvLSTM
    x = tf.keras.layers.ZeroPadding2D(padding=((0, 1), (0, 1)))(x)
    
    # Max pooling to reduce spatial dimensions
    x = tf.keras.layers.MaxPooling2D(pool_size=(2, 2))(x)
    
    # Transformer layer with fewer dimensions
    d_model = 16
    num_heads = 2
    ff_dim = 32
    x = tf.keras.layers.Reshape((-1, d_model))(x)
    x = transformer_encoder(x, d_model, num_heads, ff_dim)
    x = tf.keras.layers.Reshape((75, 91, d_model))(x)
    
    # Upsample layer to match desired output size
    x = tf.keras.layers.UpSampling2D(size=(2, 2))(x)
    
    # Cropping layer to match the exact desired size
    x = tf.keras.layers.Cropping2D(cropping=((0, 1), (0, 1)))(x)
    
    # Output Conv2D layer
    outputs = tf.keras.layers.Conv2D(filters=1, kernel_size=(3, 3), padding='same', activation='linear')(x)
    
    return tf.keras.models.Model(inputs=inputs, outputs=outputs)

model = create_transformer_model()

In [7]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 5, 149, 181, 1)]     0         []                            
                                                                                                  
 conv_lstm2d (ConvLSTM2D)    (None, 149, 181, 16)         9856      ['input_1[0][0]']             
                                                                                                  
 batch_normalization (Batch  (None, 149, 181, 16)         64        ['conv_lstm2d[0][0]']         
 Normalization)                                                                                   
                                                                                                  
 zero_padding2d (ZeroPaddin  (None, 150, 182, 16)         0         ['batch_normalization[0][0

In [8]:
model.compile(optimizer='adam', loss='mse', metrics=['mse'])

early_stop = EarlyStopping(patience=5, restore_best_weights=True)

train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(32)

val_dataset = tf.data.Dataset.from_tensor_slices((X_val, y_val))
val_dataset = val_dataset.batch(32)


history = model.fit(train_dataset, epochs=20, validation_data=val_dataset, callbacks=[early_stop])

Epoch 1/20


2023-08-10 15:37:11.875543: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 11924640000 exceeds 10% of free system memory.
2023-08-10 15:37:12.871694: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 11924640000 exceeds 10% of free system memory.
2023-08-10 15:37:18.694906: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 11924640000 exceeds 10% of free system memory.
2023-08-10 15:37:19.806468: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 11924640000 exceeds 10% of free system memory.


  1/168 [..............................] - ETA: 54:26 - loss: 1.8855 - mse: 1.8855

2023-08-10 15:37:27.779378: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 11924640000 exceeds 10% of free system memory.


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
