In [1]:
import pandas as pd
import polars 
import torch
import numpy as np
from torch import nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, random_split
import math
import pytorch_lightning as pl
from torchmetrics.regression import R2Score
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.loggers import TensorBoardLogger
from models.datasets import *
from models.fcn import *

In [27]:
df = polars.read_csv('data/train.csv')
test_df = polars.read_csv('data/test.csv')
ss = polars.read_csv('data/sample_submission.csv', n_rows=1)

In [3]:
SRC_COLS = df.columns[1:557]
TGT_COLS = df.columns[557:]

for col in SRC_COLS:
    df = df.with_columns(polars.col(col).cast(polars.Float64))
    test_df = test_df.with_columns(polars.col(col).cast(polars.Float64))

for col in TGT_COLS:
    df = df.with_columns(polars.col(col).cast(polars.Float64))
    ss = ss.with_columns(polars.col(col).cast(polars.Float64))

In [4]:
w = torch.tensor(ss.select(TGT_COLS).to_numpy(), dtype=torch.float64)
w = w.squeeze(0)
# torch.save(w, 'data/weight.pt')

In [5]:
src = torch.tensor(df.select(SRC_COLS).to_numpy(), dtype=torch.float64)
test_src = torch.tensor(test_df.select(SRC_COLS).to_numpy(), dtype=torch.float64)
label = torch.tensor(df.select(TGT_COLS).to_numpy(), dtype=torch.float64)

In [6]:
src_ = torch.cat([src, test_src], dim=0)
src_mu = src_.mean(axis=0)
src_std = src_.std(axis=0)
test_src = torch.where(src_std > 0, (test_src - src_mu) / src_std, 0)

In [7]:
label *= w
label_mu = label.mean(axis=0)
label_std = label.std(axis=0)
label = torch.where(label_std > 0, (label - label_mu) / label_std, 0)

In [8]:
input_seq_name = [
    "state_t", "state_q0001", "state_q0002", "state_q0003", "state_u", "state_v", "pbuf_ozone", "pbuf_CH4", "pbuf_N2O"
]
input_scl_name = [
    "state_ps", "pbuf_SOLIN", "pbuf_LHFLX", "pbuf_SHFLX", "pbuf_TAUX", "pbuf_TAUY", "pbuf_COSZRS", "cam_in_ALDIF", "cam_in_ALDIR", "cam_in_ASDIF", "cam_in_ASDIR", "cam_in_LWUP", "cam_in_ICEFRAC", "cam_in_LANDFRAC", "cam_in_OCNFRAC", "cam_in_SNOWHLAND"
]
input_seq_idx = [[idx - 1 for idx, column in enumerate(df.columns) if 
                    column.startswith(var)] for var in input_seq_name]
input_scl_idx = [[idx - 1 for idx, column in enumerate(df.columns) if 
                    column.startswith(var)] for var in input_scl_name]
test_seq = torch.stack([test_src[:, i] for i in input_seq_idx], dim=-1)
test_scl = torch.stack([test_src[:, i].repeat(1, 60) for i in input_scl_idx], dim=-1)
input_dim = test_seq.size(-1)
scalar_dim = test_scl.size(-1)

In [9]:
# src = torch.where(src_std > 0, (src - src_mu) / src_std, 0)
# seq = torch.stack([src[:, i] for i in input_seq_idx], dim=-1)
# scl = torch.stack([src[:, i].repeat(1, 60) for i in input_scl_idx], dim=-1)
# torch.save(seq, "./data/seq.pt")
# torch.save(scl, "./data/scl.pt")
# torch.save(label, "./data/labeln.pt")
# torch.save(label_std, "./data/labelstd.pt")

In [10]:
test_ds = MixTestDataset(test_seq, test_scl)
test_loader = DataLoader(test_ds, batch_size=1024, shuffle=False)

In [23]:
ckpt = "/m9400/users/lkv6309/leap/ckpt/roks-epoch=54-val_score=0.704.ckpt"
model = RoFCN.load_from_checkpoint(ckpt).double()
# model = TransformerFilteredModel.load_from_checkpoint(ckpt)
checkpoint_callback = ModelCheckpoint(
    dirpath='ckpt/',
    filename='te-base-{epoch:02d}-{val_loss:.2f}',
    save_top_k=-1,
    monitor='val_loss',
    mode='min'
)
logger = TensorBoardLogger(save_dir="logger")
trainer = Trainer(
    logger=logger,
    callbacks=[checkpoint_callback],
    max_epochs=10,
    accelerator="gpu",
    devices=[0]
)

/m9400/users/lkv6309/miniconda3/envs/rise/lib/python3.11/site-packages/pytorch_lightning/utilities/migration/utils.py:56: The loaded checkpoint was produced with Lightning v2.2.3, which is newer than your current Lightning version: v2.2.1
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [24]:
torch.set_float32_matmul_precision('high')
pred = trainer.predict(model, test_loader)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]
/m9400/users/lkv6309/miniconda3/envs/rise/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=111` in the `DataLoader` to improve performance.


Predicting: |          | 0/? [00:00<?, ?it/s]

In [25]:
preds = torch.cat(pred)
for i in range(label_std.shape[0]):
    if w[i] == 0:
        preds[:,i] = 0
preds = preds * label_std + label_mu

In [22]:
print(preds)

tensor([[-1.7817e-06, -5.5111e-05, -7.7009e-05,  ...,  3.5442e-01,
         -3.1319e+00, -2.1852e+00],
        [-1.5716e-05, -4.9336e-05, -3.9350e-05,  ..., -1.7261e+00,
         -1.4165e+00, -2.9946e-01],
        [-5.1268e-06, -7.3293e-05, -3.1420e-05,  ..., -5.8942e+00,
         -6.9565e-01,  1.2907e-01],
        ...,
        [ 3.6679e-05,  4.3801e-06,  1.6792e-05,  ...,  1.9121e+02,
          6.2625e+01,  1.6884e+01],
        [-1.9573e-05, -1.8866e-05, -3.4623e-05,  ...,  2.4550e+00,
         -1.3021e+00, -1.6688e+00],
        [ 4.0301e-05,  3.2533e-05,  6.7218e-05,  ...,  5.5086e+01,
          4.4161e+01,  2.3757e+01]], dtype=torch.float64)


In [15]:
# df1 = pd.read_csv('55.csv')
# preds = df1.iloc[:, 1:].to_numpy()

In [28]:
ss = pd.read_csv("./data/sample_submission.csv")
ss.iloc[:,1:] = preds.numpy()
use_cols = []
for i in range(27):
    use_cols.append(f"ptend_q0002_{i}")
ss2 = pd.read_csv("./data/sample_submission.csv")
test_df = test_df.to_pandas()
for col in use_cols:
    ss[col] = - test_df[col.replace("ptend", "state")] * ss2[col] / 1200.

# use_cols = []
# for i in range(27):
#     use_cols.append(f"ptend_q0002_{i}")
# test_df = test_df.to_pandas()
# for col in use_cols:
#     ss[col] = - test_df[col.replace("ptend", "state")] / 1200.

test_polars = polars.from_pandas(ss[["sample_id"]+TGT_COLS])
test_polars.write_csv("submission.csv")

 -1.64722610e-05  3.70975542e-05]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  ss.iloc[:,1:] = preds.numpy()
 -1.61013183e-05  2.60690505e-05]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  ss.iloc[:,1:] = preds.numpy()
 -3.57836309e-05  7.13677673e-05]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  ss.iloc[:,1:] = preds.numpy()
 -5.50153158e-05  9.02534316e-05]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  ss.iloc[:,1:] = preds.numpy()
 -8.38426665e-05  1.04211870e-04]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  ss.iloc[:,1:] = preds.numpy()
 -1.11405222e-04  9.13805813e-05]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  ss.iloc[:,1:] = preds.numpy()
 -9.89648120e-05  6.48986999e-05]' has dtype incompatible with int64, please

In [29]:
! kaggle competitions submit -c leap-atmospheric-physics-ai-climsim -f submission.csv -m "roks 704"

100%|██████████████████████████████████████| 4.19G/4.19G [00:58<00:00, 76.9MB/s]
Successfully submitted to LEAP - Atmospheric Physics using AI (ClimSim)