In [1]:
import os
import numpy as np

# 1. Get the current working directory
current_dir = os.getcwd()

# Load the memory-mapped tensor
super_tensor_file = os.path.join(current_dir, "super_emissions_tensor.npy")
molecule_types = ["CH4", "CO2", "CO2bio", "GWP", "N2O"]
data_types = ["emi", "flx"]

# Load the super tensor
super_tensor_shape = (5, 2, 288, 1800, 3600)
super_tensor = np.memmap(super_tensor_file, dtype='float32', mode='r', shape=super_tensor_shape)

In [2]:
super_tensor.shape

(5, 2, 288, 1800, 3600)

In [3]:
import numpy as np

# Assuming you have already loaded your memory-mapped tensor as:
# super_tensor = np.memmap(super_tensor_file, dtype='float32', mode='r', shape=(5, 2, 288, 1800, 3600))

# Reshape the last two dimensions so that:
# 1800 -> (600, 3) and 3600 -> (1200, 3)
reshaped = super_tensor.reshape(5, 2, 288, 600, 3, 1200, 3)

# Compute the mean over the 3x3 blocks (axis 4 and axis 6)
kernelized_tensor = reshaped.mean(axis=(4, 6))

print(kernelized_tensor.shape)  # Expected shape: (5, 2, 288, 600, 1200)

(5, 2, 288, 600, 1200)


In [4]:
np.save("kernelized_tensor.npy", kernelized_tensor)

In [2]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  # Suppress INFO and WARNING messages

In [2]:
import os
import numpy as np

# 1. Get the current working directory
current_dir = os.getcwd()

# Load the memory-mapped tensor
super_tensor_file = os.path.join(current_dir, "kernelized_tensor.npy")
molecule_types = ["CH4", "CO2", "CO2bio", "GWP", "N2O"]
data_types = ["emi", "flx"]

# Load the super tensor
super_tensor_shape = (5, 2, 288, 600, 1200)
kernelized_tensor = np.memmap(super_tensor_file, dtype='float32', mode='r', shape=super_tensor_shape)

print(kernelized_tensor.shape)

(5, 2, 288, 600, 1200)


In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import Callback
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import matplotlib.pyplot as plt

# Optionally enable mixed precision if using GPUs
if tf.config.list_physical_devices('GPU'):
    tf.keras.mixed_precision.set_global_policy('mixed_float16')
    print("Mixed precision enabled")

# --- Data Dimensions ---
num_molecules = 5         # CH4, CO2, CO2bio, GWP, N2O
num_time = 288            # 12 months * 24 years
lat_full = 600            # Full kernelized latitude dimension
lon_full = 1200           # Full kernelized longitude dimension

# We'll work in a lower resolution for this baseline:
low_res_h, low_res_w = 75, 150
output_channels = 5       # one channel per molecule

# --- Use preloaded tensor ---
# Assume 'kernelized_tensor' is loaded and has shape (5, 2, 288, 600, 1200)
data = kernelized_tensor

# Select emissions only (data type index 0); resulting shape: (5, 288, 600, 1200)
data_emissions = data[:, 0, :, :, :]

# Rearrange dimensions so that time is first: (288, 5, 600, 1200)
emissions_data = np.transpose(data_emissions, (1, 0, 2, 3))

# Convert targets to float32 and reshape to (num_time, 5, 600, 1200)
Y_full = emissions_data.astype(np.float32).reshape(num_time, num_molecules, lat_full, lon_full)

# --- Downsample Targets ---
# Downsample from (600, 1200) to (75, 150) using block averaging.
Y_full = np.transpose(Y_full, (0, 2, 3, 1))  # Now shape: (num_time, 600, 1200, 5)
Y_low = np.empty((num_time, low_res_h, low_res_w, output_channels), dtype=np.float32)
for i in range(num_time):
    # Average over each 8x8 block (600/75=8 and 1200/150=8)
    Y_low[i] = Y_full[i].reshape(low_res_h, 8, low_res_w, 8, output_channels).mean(axis=(1, 3))

# --- Log Transformation ---
# Apply log1p to compress the wide dynamic range (assumes targets are nonnegative)
Y_low_log = np.log1p(Y_low)

# --- Prepare Time Features ---
# Encode the month cyclically (this helps capture seasonal/periodic patterns).
months = np.arange(num_time, dtype=np.float32)
month_norm = months / (num_time - 1)
X = np.stack([np.sin(2 * np.pi * month_norm), np.cos(2 * np.pi * month_norm)], axis=1)
# X shape: (num_time, 2)

# --- Train/Test Split ---
train_time = 48  # First 4 years
test_time = 12   # Next 12 months
X_train = X[:train_time]
X_test = X[train_time:train_time + test_time]
Y_train = Y_low_log[:train_time]  # shape: (48, 75, 150, 5)
Y_test = Y_low_log[train_time:train_time + test_time]

print("Training samples:", X_train.shape[0])
print("Test samples:", X_test.shape[0])
print("Target (log-transformed, low-res) shape per sample:", Y_train.shape[1:])  # Should be (75,150,5)

# --- Normalize Log-Transformed Targets ---
Y_train_mean = np.mean(Y_train, dtype=np.float32)
Y_train_std = np.std(Y_train, dtype=np.float32)
print("Y_train_mean (log scale):", Y_train_mean)
print("Y_train_std (log scale):", Y_train_std)
Y_train_norm = (Y_train - Y_train_mean) / Y_train_std
Y_test_norm = (Y_test - Y_train_mean) / Y_train_std

# --- Custom Callback ---
class ProgressCallback(Callback):
    def on_epoch_end(self, epoch, logs=None):
        loss = logs.get('loss')
        print(f"Epoch {epoch+1:03d}: Loss = {loss:.6f}")
progress_callback = ProgressCallback()

# --- GPU Setup ---
strategy = tf.distribute.MirroredStrategy()
print(f"Number of devices: {strategy.num_replicas_in_sync}")

with strategy.scope():
    # Build a simple MLP model (this is an MLP, not a DenseNet).
    # MLP stands for Multi-Layer Perceptron: it consists of fully connected (dense) layers.
    model = models.Sequential([
        layers.Input(shape=(2,)),  # Input: 2 features (sin and cos)
        layers.Dense(256, activation='relu'),
        layers.Dense(256, activation='relu'),
        layers.Dense(256, activation='relu'),
        # Output a vector that reshapes to (75, 150, 5)
        layers.Dense(low_res_h * low_res_w * output_channels, activation='relu'),
        layers.Reshape((low_res_h, low_res_w, output_channels))
    ])
    optimizer = Adam(learning_rate=1e-7, clipnorm=1.0)
    model.compile(optimizer=optimizer, loss='mse')
    model.summary()

# --- Train the Model ---
history = model.fit(
    X_train, Y_train_norm,
    epochs=20000,
    batch_size=1,
    verbose=0,
    callbacks=[progress_callback]
)

# --- Plot Training Loss History ---
plt.figure(figsize=(8, 4))
plt.plot(history.history['loss'], marker='o')
plt.xlabel('Epoch')
plt.ylabel('Training Loss (MSE)')
plt.title('Training Loss Progress')
plt.grid(True)
plt.show()

# --- Evaluate the Model ---
Y_pred_norm = model.predict(X_test)
test_mse_norm = mean_squared_error(Y_test_norm.flatten(), Y_pred_norm.flatten())
print("Test MSE (normalized):", test_mse_norm)

# Inverse transform predictions:
Y_pred_log = Y_pred_norm * Y_train_std + Y_train_mean
# Invert the log transform using expm1 to recover original (low-resolution) scale
Y_pred = np.expm1(Y_pred_log)
Y_test_inv = np.expm1(Y_test_norm * Y_train_std + Y_train_mean)

# --- Compute Additional Metrics ---

# Standard metrics:
mse = mean_squared_error(Y_test_inv.flatten(), Y_pred.flatten())
rmse = np.sqrt(mse)
mae = mean_absolute_error(Y_test_inv.flatten(), Y_pred.flatten())
r2 = r2_score(Y_test_inv.flatten(), Y_pred.flatten())
corr = np.corrcoef(Y_test_inv.flatten(), Y_pred.flatten())[0, 1]

# Define a custom "precision" for regression:
# Here we count the fraction of predictions whose relative error is below a threshold (e.g., 10%).
def regression_precision(y_true, y_pred, threshold=0.1):
    epsilon = 1e-8  # small constant to avoid division by zero
    relative_error = np.abs(y_true - y_pred) / (np.abs(y_true) + epsilon)
    return np.mean(relative_error < threshold)

precision = regression_precision(Y_test_inv.flatten(), Y_pred.flatten(), threshold=0.1)

print("MSE:", mse)
print("RMSE:", rmse)
print("MAE:", mae)
print("R^2:", r2)
print("Pearson correlation coefficient:", corr)
print("Regression Precision (within 10% error):", precision)

# --- Visualize a Sample Prediction ---
sample_idx = 0  # first test sample
molecule_idx = 0  # visualize molecule 0

plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.imshow(Y_test_inv[sample_idx, :, :, molecule_idx], cmap='viridis')
plt.title('Ground Truth (Molecule 0)')
plt.colorbar()

plt.subplot(1, 2, 2)
plt.imshow(Y_pred[sample_idx, :, :, molecule_idx], cmap='viridis')
plt.title('Prediction (Molecule 0)')
plt.colorbar()

plt.tight_layout()
plt.show()


Mixed precision enabled
Training samples: 48
Test samples: 12
Target (log-transformed, low-res) shape per sample: (75, 150, 5)
Y_train_mean (log scale): 0.19016193
Y_train_std (log scale): 0.7214341
INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)
Number of devices: 1


Epoch 001: Loss = 1.000012
Epoch 002: Loss = 1.000005
Epoch 003: Loss = 0.999997
Epoch 004: Loss = 0.999989


2025-03-08 15:17:28.252377: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node MultiDeviceIteratorGetNextFromShard}}]]
	 [[RemoteCall]]


Epoch 005: Loss = 0.999981
Epoch 006: Loss = 0.999972
Epoch 007: Loss = 0.999964
Epoch 008: Loss = 0.999955
Epoch 009: Loss = 0.999947
Epoch 010: Loss = 0.999938
Epoch 011: Loss = 0.999929
Epoch 012: Loss = 0.999921
Epoch 013: Loss = 0.999912
Epoch 014: Loss = 0.999903
Epoch 015: Loss = 0.999894
Epoch 016: Loss = 0.999885
Epoch 017: Loss = 0.999876
Epoch 018: Loss = 0.999867
Epoch 019: Loss = 0.999859
Epoch 020: Loss = 0.999849


2025-03-08 15:17:33.975019: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node MultiDeviceIteratorGetNextFromShard}}]]
	 [[RemoteCall]]


Epoch 021: Loss = 0.999840
Epoch 022: Loss = 0.999831
Epoch 023: Loss = 0.999822
Epoch 024: Loss = 0.999813
Epoch 025: Loss = 0.999804
Epoch 026: Loss = 0.999794
Epoch 027: Loss = 0.999785
Epoch 028: Loss = 0.999776
Epoch 029: Loss = 0.999766
Epoch 030: Loss = 0.999757
Epoch 031: Loss = 0.999747
Epoch 032: Loss = 0.999738
Epoch 033: Loss = 0.999728
Epoch 034: Loss = 0.999719
Epoch 035: Loss = 0.999709
Epoch 036: Loss = 0.999700
Epoch 037: Loss = 0.999690
Epoch 038: Loss = 0.999680
Epoch 039: Loss = 0.999670
Epoch 040: Loss = 0.999660
Epoch 041: Loss = 0.999650
Epoch 042: Loss = 0.999640
Epoch 043: Loss = 0.999631
Epoch 044: Loss = 0.999620
Epoch 045: Loss = 0.999611
Epoch 046: Loss = 0.999601
Epoch 047: Loss = 0.999591
Epoch 048: Loss = 0.999581
Epoch 049: Loss = 0.999570
Epoch 050: Loss = 0.999560
Epoch 051: Loss = 0.999550
Epoch 052: Loss = 0.999539


2025-03-08 15:17:45.602818: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node MultiDeviceIteratorGetNextFromShard}}]]
	 [[RemoteCall]]


Epoch 053: Loss = 0.999529
Epoch 054: Loss = 0.999519
Epoch 055: Loss = 0.999509
Epoch 056: Loss = 0.999498
Epoch 057: Loss = 0.999488
Epoch 058: Loss = 0.999478
Epoch 059: Loss = 0.999467
Epoch 060: Loss = 0.999456
Epoch 061: Loss = 0.999446
Epoch 062: Loss = 0.999435
Epoch 063: Loss = 0.999425
Epoch 064: Loss = 0.999414
Epoch 065: Loss = 0.999403
Epoch 066: Loss = 0.999392
Epoch 067: Loss = 0.999381
Epoch 068: Loss = 0.999371
Epoch 069: Loss = 0.999360
Epoch 070: Loss = 0.999349
Epoch 071: Loss = 0.999338
Epoch 072: Loss = 0.999327
Epoch 073: Loss = 0.999317
Epoch 074: Loss = 0.999306
Epoch 075: Loss = 0.999295
Epoch 076: Loss = 0.999284
Epoch 077: Loss = 0.999273
Epoch 078: Loss = 0.999262
Epoch 079: Loss = 0.999251
Epoch 080: Loss = 0.999239
Epoch 081: Loss = 0.999228
Epoch 082: Loss = 0.999217
Epoch 083: Loss = 0.999206
Epoch 084: Loss = 0.999195
Epoch 085: Loss = 0.999184
Epoch 086: Loss = 0.999172
Epoch 087: Loss = 0.999161
Epoch 088: Loss = 0.999150
Epoch 089: Loss = 0.999138
E

2025-03-08 15:18:08.608252: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node MultiDeviceIteratorGetNextFromShard}}]]
	 [[RemoteCall]]


Epoch 117: Loss = 0.998812
Epoch 118: Loss = 0.998800
Epoch 119: Loss = 0.998788
Epoch 120: Loss = 0.998776
Epoch 121: Loss = 0.998764
Epoch 122: Loss = 0.998752
Epoch 123: Loss = 0.998740
Epoch 124: Loss = 0.998728
Epoch 125: Loss = 0.998716
Epoch 126: Loss = 0.998703
Epoch 127: Loss = 0.998691
Epoch 128: Loss = 0.998679
Epoch 129: Loss = 0.998667
Epoch 130: Loss = 0.998654
Epoch 131: Loss = 0.998642
Epoch 132: Loss = 0.998630
Epoch 133: Loss = 0.998618
Epoch 134: Loss = 0.998605
Epoch 135: Loss = 0.998593
Epoch 136: Loss = 0.998581
Epoch 137: Loss = 0.998568
Epoch 138: Loss = 0.998556
Epoch 139: Loss = 0.998544
Epoch 140: Loss = 0.998531
Epoch 141: Loss = 0.998519
Epoch 142: Loss = 0.998506
Epoch 143: Loss = 0.998493
Epoch 144: Loss = 0.998481
Epoch 145: Loss = 0.998469
Epoch 146: Loss = 0.998456
Epoch 147: Loss = 0.998443
Epoch 148: Loss = 0.998431
Epoch 149: Loss = 0.998418
Epoch 150: Loss = 0.998406
Epoch 151: Loss = 0.998393
Epoch 152: Loss = 0.998380
Epoch 153: Loss = 0.998367
E

2025-03-08 15:18:54.597978: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node MultiDeviceIteratorGetNextFromShard}}]]
	 [[RemoteCall]]


Epoch 245: Loss = 0.997102
Epoch 246: Loss = 0.997088
Epoch 247: Loss = 0.997072
Epoch 248: Loss = 0.997058
Epoch 249: Loss = 0.997043
Epoch 250: Loss = 0.997028
Epoch 251: Loss = 0.997013
Epoch 252: Loss = 0.996998
Epoch 253: Loss = 0.996983
Epoch 254: Loss = 0.996968
Epoch 255: Loss = 0.996953
Epoch 256: Loss = 0.996938
Epoch 257: Loss = 0.996922
Epoch 258: Loss = 0.996907
Epoch 259: Loss = 0.996892
Epoch 260: Loss = 0.996877
Epoch 261: Loss = 0.996862
Epoch 262: Loss = 0.996846
Epoch 263: Loss = 0.996831
Epoch 264: Loss = 0.996816
Epoch 265: Loss = 0.996800
Epoch 266: Loss = 0.996785
Epoch 267: Loss = 0.996770
Epoch 268: Loss = 0.996754
Epoch 269: Loss = 0.996739
Epoch 270: Loss = 0.996723
Epoch 271: Loss = 0.996707
Epoch 272: Loss = 0.996692
Epoch 273: Loss = 0.996676
Epoch 274: Loss = 0.996661
Epoch 275: Loss = 0.996645
Epoch 276: Loss = 0.996629
Epoch 277: Loss = 0.996614
Epoch 278: Loss = 0.996598
Epoch 279: Loss = 0.996582
Epoch 280: Loss = 0.996567
Epoch 281: Loss = 0.996551
E

2025-03-08 15:20:27.086768: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node MultiDeviceIteratorGetNextFromShard}}]]
	 [[RemoteCall]]


Epoch 501: Loss = 0.992203
Epoch 502: Loss = 0.992179
Epoch 503: Loss = 0.992154
Epoch 504: Loss = 0.992131
Epoch 505: Loss = 0.992106
Epoch 506: Loss = 0.992082
Epoch 507: Loss = 0.992057
Epoch 508: Loss = 0.992032
Epoch 509: Loss = 0.992008
Epoch 510: Loss = 0.991984
Epoch 511: Loss = 0.991959
Epoch 512: Loss = 0.991934
Epoch 513: Loss = 0.991909
Epoch 514: Loss = 0.991884
Epoch 515: Loss = 0.991859
Epoch 516: Loss = 0.991835
Epoch 517: Loss = 0.991810
Epoch 518: Loss = 0.991785
Epoch 519: Loss = 0.991760
Epoch 520: Loss = 0.991735
Epoch 521: Loss = 0.991710
Epoch 522: Loss = 0.991685
Epoch 523: Loss = 0.991660
Epoch 524: Loss = 0.991635
Epoch 525: Loss = 0.991610
Epoch 526: Loss = 0.991585
Epoch 527: Loss = 0.991560
Epoch 528: Loss = 0.991534
Epoch 529: Loss = 0.991509
Epoch 530: Loss = 0.991484
Epoch 531: Loss = 0.991458
Epoch 532: Loss = 0.991433
Epoch 533: Loss = 0.991407
Epoch 534: Loss = 0.991381
Epoch 535: Loss = 0.991356
Epoch 536: Loss = 0.991329
Epoch 537: Loss = 0.991304
E

2025-03-08 15:23:32.037817: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node MultiDeviceIteratorGetNextFromShard}}]]
	 [[RemoteCall]]


Epoch 1013: Loss = 0.973046
Epoch 1014: Loss = 0.972993
Epoch 1015: Loss = 0.972940
Epoch 1016: Loss = 0.972887
Epoch 1017: Loss = 0.972833
Epoch 1018: Loss = 0.972779
Epoch 1019: Loss = 0.972725
Epoch 1020: Loss = 0.972672
Epoch 1021: Loss = 0.972618
Epoch 1022: Loss = 0.972566
Epoch 1023: Loss = 0.972513
Epoch 1024: Loss = 0.972459
Epoch 1025: Loss = 0.972407
Epoch 1026: Loss = 0.972353
Epoch 1027: Loss = 0.972299
Epoch 1028: Loss = 0.972244
Epoch 1029: Loss = 0.972190
Epoch 1030: Loss = 0.972136
Epoch 1031: Loss = 0.972081
Epoch 1032: Loss = 0.972026
Epoch 1033: Loss = 0.971970
Epoch 1034: Loss = 0.971916
Epoch 1035: Loss = 0.971862
Epoch 1036: Loss = 0.971808
Epoch 1037: Loss = 0.971753
Epoch 1038: Loss = 0.971699
Epoch 1039: Loss = 0.971645
Epoch 1040: Loss = 0.971590
Epoch 1041: Loss = 0.971535
Epoch 1042: Loss = 0.971480
Epoch 1043: Loss = 0.971425
Epoch 1044: Loss = 0.971370
Epoch 1045: Loss = 0.971316
Epoch 1046: Loss = 0.971261
Epoch 1047: Loss = 0.971206
Epoch 1048: Loss = 0

2025-03-08 15:29:41.608158: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node MultiDeviceIteratorGetNextFromShard}}]]
	 [[RemoteCall]]


Epoch 2037: Loss = 0.875203
Epoch 2038: Loss = 0.875062
Epoch 2039: Loss = 0.874918
Epoch 2040: Loss = 0.874777
Epoch 2041: Loss = 0.874633
Epoch 2042: Loss = 0.874488
Epoch 2043: Loss = 0.874345
Epoch 2044: Loss = 0.874202
Epoch 2045: Loss = 0.874056
Epoch 2046: Loss = 0.873909
Epoch 2047: Loss = 0.873764
Epoch 2048: Loss = 0.873615
Epoch 2049: Loss = 0.873474
Epoch 2050: Loss = 0.873330
Epoch 2051: Loss = 0.873186
Epoch 2052: Loss = 0.873041
Epoch 2053: Loss = 0.872899
Epoch 2054: Loss = 0.872756
Epoch 2055: Loss = 0.872613
Epoch 2056: Loss = 0.872469
Epoch 2057: Loss = 0.872327
Epoch 2058: Loss = 0.872181
Epoch 2059: Loss = 0.872036
Epoch 2060: Loss = 0.871890
Epoch 2061: Loss = 0.871745
Epoch 2062: Loss = 0.871602
Epoch 2063: Loss = 0.871458
Epoch 2064: Loss = 0.871312
Epoch 2065: Loss = 0.871163
Epoch 2066: Loss = 0.871017
Epoch 2067: Loss = 0.870866
Epoch 2068: Loss = 0.870720
Epoch 2069: Loss = 0.870574
Epoch 2070: Loss = 0.870429
Epoch 2071: Loss = 0.870278
Epoch 2072: Loss = 0

In [3]:
import tensorflow as tf
tf.keras.backend.clear_session()

import gc
gc.collect()

0