In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
from utils.models.lstm import LSTM
from utils.metrics import growth_metric
from torch.optim import SGD, Adam


# Model

In [2]:
# Set seeds and device
seed = 2
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
np.random.seed(seed)

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device}")

Using cpu


## Load dataset

In [3]:
from utils.dataset import get_datasets

path_Full = "data/Feature_data.csv"
path_Before2022 = "data/Before2022_Feature_data.csv"
path_From2022 = "data/From2022_Feature_data.csv"

train_dataset, val_dataset = get_datasets(path_Full, test_size=0.1)

data/Feature_data.csv
We have 0/325732 iters


In [None]:
print(train_dataset[0][0].shape)

torch.Size([10, 16])


In [5]:
BATCH_SIZE = 64

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

In [6]:
# Model and optimizer
model = LSTM()
optimizer = Adam(model.parameters(), lr=0.01)
loss = torch.nn.functional.mse_loss

cfg = {
        "model": model,
        "setup": "train",
        "loss": loss,
        "optimizer": optimizer,
        "epochs": 10,
        "data_loader": train_loader,
        "val_data": val_loader
}

In [7]:
from utils.experiments import Experiment

model = Experiment(cfg)

In [8]:
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint, Callback, EarlyStopping

checkpoint_callback = ModelCheckpoint(
    monitor='val_loss',
    dirpath='./',
    filename='{epoch:02d}-{val_loss:.2f}'
)

class PrintCallback(pl.Callback):
    def on_train_epoch_end(self, trainer, pl_module):
        print(f"Training loss: {trainer.callback_metrics['train_loss']}")
        
    def on_validation_end(self, trainer, pl_module):
        print(f"Validation loss: {trainer.callback_metrics['val_loss']}, Mse: {trainer.callback_metrics['val_mse']}")

# First train over 2022

In [9]:
cfg["data_loader"] = train_loader
cfg["val_loader"] = val_loader
trainer = pl.Trainer(accelerator="cpu", max_epochs=cfg["epochs"], callbacks=[PrintCallback()])#, EarlyStopping(monitor="val_loss", mode="min")])
trainer.fit(model, cfg["data_loader"], cfg["val_data"])

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/home/ggil/anaconda3/envs/hackathon/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py:75: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `pytorch_lightning` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default


# Now Finetune on dataset after 2022

In [10]:

cfg["data_loader"] = train_loader
cfg["val_loader"] = val_loader
trainer = pl.Trainer(accelerator="cpu", max_epochs=cfg["epochs"], callbacks=[PrintCallback()])#, EarlyStopping(monitor="val_loss", mode="min")])
trainer.fit(model, cfg["data_loader"], cfg["val_data"])

/home/ggil/anaconda3/envs/hackathon/lib/python3.12/site-packages/pytorch_lightning/core/optimizer.py:181: `LightningModule.configure_optimizers` returned `None`, this fit will run with no optimizer

  | Name  | Type | Params
-------------------------------
0 | model | LSTM | 809 K 
-------------------------------
809 K     Trainable params
0         Non-trainable params
809 K     Total params
3.238     Total estimated model params size (MB)


Sanity Checking DataLoader 0: 100%|██████████| 2/2 [00:00<00:00, 59.65it/s]Validation loss: 2.1165753453510656e+17, Mse: 2.1165751735523738e+17
Epoch 0: 100%|██████████| 442/442 [00:07<00:00, 55.80it/s, v_num=11]       Validation loss: 1.4052377937261363e+17, Mse: 1.4052377937261363e+17
Epoch 0: 100%|██████████| 442/442 [00:08<00:00, 52.14it/s, v_num=11, val_loss=1.41e+17]Training loss: 324824987598848.0
Epoch 1: 100%|██████████| 442/442 [00:08<00:00, 54.73it/s, v_num=11, val_loss=1.41e+17]Validation loss: 1.4052377937261363e+17, Mse: 1.4052377937261363e+17
Epoch 1: 100%|██████████| 442/442 [00:08<00:00, 51.49it/s, v_num=11, val_loss=1.41e+17]Training loss: 8.910720545377485e+16
Epoch 2: 100%|██████████| 442/442 [00:08<00:00, 54.52it/s, v_num=11, val_loss=1.41e+17]Validation loss: 1.4052377937261363e+17, Mse: 1.4052377937261363e+17
Epoch 2: 100%|██████████| 442/442 [00:08<00:00, 51.36it/s, v_num=11, val_loss=1.41e+17]Training loss: 2.4041618887252378e+17
Epoch 3:  73%|███████▎  | 324/4

/home/ggil/anaconda3/envs/hackathon/lib/python3.12/site-packages/pytorch_lightning/trainer/call.py:54: Detected KeyboardInterrupt, attempting graceful shutdown...
