In [1]:
import polars
import pandas
import torch
import lightning.pytorch as pl
from torch.utils.data import Dataset, DataLoader, random_split
from lightning.pytorch.callbacks import ModelCheckpoint
from lightning.pytorch.loggers import TensorBoardLogger

from datasets import LEAPTestDataset
from ptlit import PTLit
from models.jnet import JLNet

In [2]:
label_std = torch.load("../data/labelnstd.pt").to(torch.float64)
label_mu = torch.load("../data/labelnmu.pt").to(torch.float64)
test_seq = torch.load("../data/test_seq_p.pt").to(torch.float64)
test_scl = torch.load("../data/test_scl_p.pt").to(torch.float64)
mask = torch.load("../data/weight.pt").to(torch.float32).bool()

In [3]:
test_ds = LEAPTestDataset(test_seq, test_scl)
test_loader = DataLoader(test_ds, batch_size=1024, shuffle=False)
mdlit = PTLit.load_from_checkpoint("../ckpt/em/tri-n-epoch=15-val_score=0.761.ckpt").double()

/m9400/users/lkv6309/miniconda3/envs/rise/lib/python3.11/site-packages/lightning/pytorch/utilities/parsing.py:199: Attribute 'models' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['models'])`.


In [4]:
checkpoint_callback = ModelCheckpoint(
    dirpath='ckpt/',
    filename='te-base-{epoch:02d}-{val_loss:.2f}',
    save_top_k=-1,
    monitor='val_loss',
    mode='min'
)
logger = TensorBoardLogger(save_dir="logger")
trainer = pl.Trainer(
    logger=logger,
    callbacks=[checkpoint_callback],
    max_epochs=10,
    accelerator="gpu",
    devices=[6]
)
torch.set_float32_matmul_precision('high')
pred = trainer.predict(mdlit, test_loader)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]
/m9400/users/lkv6309/miniconda3/envs/rise/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=111` in the `DataLoader` to improve performance.


Predicting: |          | 0/? [00:00<?, ?it/s]

  return F.conv1d(input, weight, bias, self.stride,


In [5]:
preds = torch.cat(pred)
preds = preds * label_std + label_mu
preds *= mask.to(torch.float32)

In [6]:
df = polars.read_csv('~/leap/data/train.csv')
test_df = polars.read_csv('~/leap/data/test.csv')
ss = polars.read_csv('~/leap/data/sample_submission.csv', n_rows=1)
ss2 = polars.read_csv("~/leap/data/sample_submission.csv")

In [7]:
SRC_COLS = df.columns[1:557]
TGT_COLS = df.columns[557:]

for col in SRC_COLS:
    df = df.with_columns(polars.col(col).cast(polars.Float64))
    test_df = test_df.with_columns(polars.col(col).cast(polars.Float64))

for col in TGT_COLS:
    df = df.with_columns(polars.col(col).cast(polars.Float64))
    ss = ss.with_columns(polars.col(col).cast(polars.Float64))
    ss2 = ss2.with_columns(polars.col(col).cast(polars.Float64))

In [8]:
ss = pandas.read_csv("~/leap/data/sample_submission.csv")
ss.iloc[:,1:] = preds.numpy()
use_cols = []
for i in range(27):
    use_cols.append(f"ptend_q0002_{i}")

# test_df = test_df.to_pandas()
for col in use_cols:
    ss[col] = - test_df[col.replace("ptend", "state")] * ss2[col] / 1200.

test_polars = polars.from_pandas(ss[["sample_id"]+TGT_COLS])
test_polars.write_csv("../outputs/emn.csv")

 -1.67251363e-05  3.72212267e-05]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  ss.iloc[:,1:] = preds.numpy()
 -2.20209132e-05  2.58400856e-05]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  ss.iloc[:,1:] = preds.numpy()
 -3.90256420e-05  6.15977219e-05]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  ss.iloc[:,1:] = preds.numpy()
 -5.75693170e-05  7.10016438e-05]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  ss.iloc[:,1:] = preds.numpy()
 -8.67282083e-05  8.47557501e-05]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  ss.iloc[:,1:] = preds.numpy()
 -1.13818633e-04  7.97219338e-05]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  ss.iloc[:,1:] = preds.numpy()
 -1.02248803e-04  5.07218776e-05]' has dtype incompatible with int64, please

In [9]:
! kaggle competitions submit -c leap-atmospheric-physics-ai-climsim -f ../outputs/emn.csv -m "761"

100%|██████████████████████████████████████| 4.07G/4.07G [00:44<00:00, 97.1MB/s]
Successfully submitted to LEAP - Atmospheric Physics using AI (ClimSim)

: 