

Import modules

In [None]:
# Import modules

import numpy as np
from time import time
from tqdm import tqdm
from os import makedirs
from abc import abstractmethod
from argparse import ArgumentParser

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as T
from torch.nn.functional import sigmoid
from torch.utils.data import DataLoader
from collections import deque

from pyphoon2.DigitalTyphoonDataset import DigitalTyphoonDataset

Define LSTM class

In [None]:
# Define LSTM : forward()

class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size,#input_size is the number of expected features of the input
                            hidden_size,#hidden_size is the number of features in the hidden state
                            num_layers,#num_layers is the number of recurrent layers, This means stacking LSTMs and taking the outputs of the first LSTM and computing the final results.
                            batch_first=True,#Batch first means that the inputs/outputs are provided as (batch,seq, feature), instead of (seq,batch,feature)
                            dropout=0.1)#Dropout is an additional layer on top of the outputs of the lstm onto the next layer. Has a probabilty of skipping the next layer via dropout
        self.fc = nn.Sequential(
            nn.Linear(hidden_size, 512),
            nn.ReLU(),
            nn.Linear(512, output_size),
        ) # This turns a sequence of modules into one larger module, the inputs of one goes into the next and so on
    #LSTM
    #Input_size :  69
    #Hidden_size : 1024
    #num_layers : 3
    #Output_size :  8

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        out, _ = self.lstm(x, (h0, c0)) # Creates a series for predictions (t+9,t+12,....,t+36)

        out = self.fc(out[:, -1, :]).squeeze() # Take only the last prediction t+36

        return out

Define a base trainer

In [None]:
# Define BaseTrainer: train(), _save_checkpoint(), load_checkpoint()
class BaseTrainer:
    def __init__(self,
                 train_loader,
                 val_loader,
                 args) -> None:
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.device = args.device
        self.batch_size = args.batch_size
        self.lr = args.lr
        self.num_epochs = args.num_epochs
        self.num_workers = args.num_workers
    
        self.log_interval = args.log_interval
        self.save_interval = args.save_interval
        self.save_dir = f"{args.save_dir}/{args.experiment}/{args.run_name}"
        makedirs(self.save_dir, exist_ok=True)
        self.opt = torch.optim.AdamW(self.model.parameters(), lr=self.lr)
        self.lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(self.opt, T_max=self.num_epochs)
        if args.checkpoint is not None:
            self._load_checkpoint(args.checkpoint)
        else:
            self.model_params = list(self.model.parameters())
            self.name = args.run_name
            self.step = 0
            self.epoch = 0

        print(f"Model has {sum(p.numel() for p in self.model_params):,} parameters and trainer is ready")
        print(f"Weights will be saved at {self.save_interval} intervals")

    def train(self):
        train_epochs = range(self.epoch, self.num_epochs)
        for _ in train_epochs:
            #Here is the actual training
            self._run_train_epoch()
            #Save at each interval
            if self.epoch % self.save_interval == 0:
                self._save_checkpoint()
            # Store validation loss
            val_loss = self._run_val_epoch()
            self.epoch += 1

        self._save_checkpoint()

    @abstractmethod
    def _run_train_epoch(self):
        ...
    @abstractmethod
    def _run_val_epoch(self):
        ...
    
    def _save_checkpoint(self, name=None):
        model_dict = self.model.state_dict()

        data = dict(
            model_dict=model_dict,
            opt_dict=self.opt.state_dict(),
            epoch=self.epoch,
            step=self.step,
            name=self.name,
        )

        path = f"{self.save_dir}/checkpoint_{self.step if name is None else name}.pth"

        torch.save(data, path)
        print(f"Checkpoint saved in {path}")


    def _load_checkpoint(self, path):
        data = torch.load(path)
        self.model.load_state_dict(data["model_dict"])
        self.model_params = list(self.model.parameters())
        self.opt.load_state_dict(data["opt_dict"])
        self.epoch = data["epoch"]
        self.step = data["step"]
        self.name = f"{data['name']}_resumed"

        print("="*100)
        print(f"Resuming training from checkpoint {path}")
        print("="*100)


Variables to be defined by user

In [4]:
# Variables to be defined by user

#### =========================================== Variables that need defining by user,

## Path to preprocessed sequences 
_path_to_data='F:/Data folder for ML/AU/AU'

## Path to preprocessed sequences 
#_preprocessed_path='C:/Users/nilss/Desktop/Advanded ML FOLDer/benchmarks/r34p_10k_w6'
_preprocessed_path='C:/Users/nilss/Desktop/Advanded ML FOLDer/outputs-Typhoon_prediction/r34p_10k_w6'

## Path to where to save the model weights 
_save_dir='C:/Users/nilss/Desktop/Advanded ML FOLDer/models'

#### =========================================== 

## Path to previous checkpoints weights, if set to None no check
#_checkpoint='C:/Users/nilss/Desktop/Advanded ML FOLDer/models/ts/lstm_10kp_3l_1024_3i_pressure/checkpoint_30000.pth'
_checkpoint='C:/Users/nilss/Desktop/Advanded ML FOLDer/outputs-Typhoon_prediction/models/ts/lstm_10kp_3l_1024_3i_pressure/checkpoint_3.pth'

Parameters and arguments in a parser object

In [None]:
# Define parameters and arguments

_labels_input="0,1,2,3,4"
_labels_output="3,4" # Solution to temporary problem
_out_dim=len([int(x) for x in _labels_output.split(",")])

# ========== Config ==========
parser = ArgumentParser()
parser.add_argument("-c", "--config_file", type=str)
parser.add_argument("--experiment", type=str, default="ts")
parser.add_argument("--run_name", type=str, default=f"lstm_{int(time())}")


# ========== Dataset / Data =========
parser.add_argument("--path_to_data", type=str, default=_path_to_data)
parser.add_argument("--preprocessed_path", type=str, default=_preprocessed_path)
parser.add_argument("--save_dir", type=str, default=_save_dir)
parser.add_argument("--checkpoint", type=str, default=_checkpoint)
parser.add_argument("--labels", type=str, default="pressure")
parser.add_argument("--labels_input", type=str, default=list(map(int, _labels_input.split(","))))
parser.add_argument("--labels_output", type=str, default=list(map(int, _labels_output.split(","))))
parser.add_argument("--interval", type=int, default=3)
parser.add_argument("--use_date", type=bool, default=False)
parser.add_argument("--pred_diff", type=bool, default=False)

# ========== Model ==========
parser.add_argument("--ts_model", type=str, default="lstm")
parser.add_argument("--backbone", type=str)
parser.add_argument("--dim", type=int)
parser.add_argument("--hidden_dim", type=int, default=1024)
parser.add_argument("--out_dim", type=int,default=_out_dim)
parser.add_argument("--num_layers", type=int, default=3)

# ========== Time Series Params ==========
parser.add_argument("--num_inputs", type=int, default=3)
parser.add_argument("--num_outputs", type=int, default=8)

# ========== Training ==========
parser.add_argument("--lr", type=float, default=2e-4)
parser.add_argument("--batch_size", type=int, default=128)
parser.add_argument("--num_epochs", type=int, default=100000)
parser.add_argument("--weight_decay", type=float, default=0)
parser.add_argument("--es_patience", type=int, default=-1)
parser.add_argument("--log_interval", type=int, default=10)
parser.add_argument("--save_interval", type=int, default=1000)

parser.add_argument("--seed", type=int, default=42)

# ========== MoCo ==========
parser.add_argument("--queue_size", type=int, default=65536)
parser.add_argument("--temperature", type=float, default=0.07)

# ========== Scheduler ==========
parser.add_argument("--ws_range", type=str, default="")
parser.add_argument("--ws_warmup", type=int)
parser.add_argument("--ws_last", type=int)

# ========== System ==========
parser.add_argument("--device", type=str, default= torch.device("cuda" if torch.cuda.is_available() else "cpu"))
parser.add_argument("--num_workers", type=int, default=0) #Number of workers used while training. Default is set to 0 to prevent errors with incompatible systems

parser = parser.parse_args([])


In [6]:
# Define LABEL_SIZE, NORMALIZATION
LABEL_SIZE = dict(
    month=12,
    day=31,
    hour=24,
    # grade=6,
)

NORMALIZATION = dict(
    pressure=[983.8, 22.5],
    wind=[36.7, 32.7],
    lat=[22.58, 10.6],
    lng=[136.2, 17.3],
)

Define a time series data loader

In [None]:
#Define TimeSeriesTrainer(BaseTrainer): _run_train_epoch, _run_val_epoch
class TimeSeriesTrainer(BaseTrainer):
    def __init__(self, train_loader, val_loader, args) -> None:
        if args.ts_model == "lstm":
            self.model = LSTM(
                train_loader.dataset.dataset.get_input_size(),
                hidden_size=args.hidden_dim,
                num_layers=args.num_layers,
                output_size=train_loader.dataset.dataset.num_preds
            ).to(args.device)
        print("LSTM :")
        print(" - Input_size : ",train_loader.dataset.dataset.get_input_size())
        print(" - Hidden_size : ", args.hidden_dim)
        print(" - num_layers : ", args.num_layers)
        print(" - Output_size : ", train_loader.dataset.dataset.num_preds)
        super().__init__(train_loader, val_loader, args)



        self.reg_criterion = nn.MSELoss()
        self.labels = args.labels


    def _run_train_epoch(self):
        self.model.train()
        pbar = tqdm(self.train_loader, desc=f"Training {self.epoch+1}/{self.num_epochs}")
        losses = dict(loss=deque(maxlen=self.log_interval))

        for batch in pbar:
            
            self.opt.zero_grad()
            inp, outs = batch[0].to(self.device), batch[1].to(self.device)
            preds = self.model(inp)

            if "grade" in self.labels:
                outs = outs == 6
                preds = sigmoid(preds)
            outs = outs[:, :, 0]
            loss = self.reg_criterion(preds, outs.float())
            #loss = self.reg_criterion(preds, (outs).float().squeeze())
            losses["loss"].append(loss.item())

            loss.backward()
            self.opt.step()

            self.step += 1

            avg = {f"tr_{key}": np.mean(val) for key, val in losses.items()}
            pbar.set_postfix(dict(loss=loss.item(), **avg))



        self.lr_scheduler.step()

    def _run_val_epoch(self):
        self.model.eval()
        pbar = tqdm(self.val_loader, desc=f"Eval {self.epoch+1}/{self.num_epochs}")
        losses = dict(loss=list())

        with torch.no_grad():
            for batch in pbar:
                inp, outs = batch[0].to(self.device), batch[1].to(self.device)
                preds = self.model(inp)

                if "grade" in self.labels:
                    outs = outs == 6
                    preds = torch.sigmoid(preds)

                outs = outs[:, :, 0]  # selecting only the first output variable (e.g. pressure)
                loss = self.reg_criterion(preds, outs.float())
                losses["loss"].append(loss.item())

                avg = {f"val_{key}": np.mean(val) for key, val in losses.items()}
                pbar.set_postfix(dict(loss=loss.item(), **avg))



        return np.mean(losses["loss"])


Define a fisheye class used to transform images

In [8]:
# Define FishEye : fisheye_grid(), forward()
def fisheye_grid(width, height, alpha):
    # Create a grid of normalized coordinates
    x, y = torch.meshgrid(torch.linspace(-1, 1, width), torch.linspace(-1, 1, height), indexing="ij")
    coords = torch.stack((y, x), dim=-1)

    # Apply fisheye transformation to the coordinates
    r = torch.sqrt(coords[:, :, 0]**2 + coords[:, :, 1]**2)
    radial_scale = torch.pow(r, alpha)#(1 - torch.pow(r, alpha)) / r
    radial_scale[r == 0] = 1.0
    fisheye_coords = coords * torch.unsqueeze(radial_scale, -1)

    # Clamp the transformed coordinates to [-1, 1] range
    fisheye_coords = torch.clamp(fisheye_coords, min=-1, max=1)

    return fisheye_coords

class FishEye(torch.nn.Module):
    def __init__(self, size, alpha):
        super().__init__()
        self.grid = fisheye_grid(size, size, alpha)

    def forward(self, img):
        if len(img.shape) == 3:
            img = img.unsqueeze(0)

        B, _, _, _ = img.shape
        fish = F.grid_sample(img, self.grid.unsqueeze(0).repeat(B, 1, 1, 1), align_corners=True).squeeze(0)
        return fish

<details>
<summary>📖Dataloader </summary>
<a name="dataloader"></a>
**Transform and filter functions **
- These functions aim to zoom in on the more important parts of the images and filter out images not deemed fit

**Data loading  **
- get_TS_dataloader(args) is defined for future use
- Data is split into training, validation and testing and then loaded



</details>

In [None]:
transforms = T.Compose([
    T.ToTensor(),
    FishEye(256, 0.2),
])
def transform_func(obj, trans=transforms):
    
    img_range = [150, 350]
    img, labels = obj
    img = img.clip(img_range[0], img_range[1])
    img = (img - img_range[0])/(img_range[1]-img_range[0])
    y, m, d, h = labels
    label = datetime(year=y, month=m, day=d, hour=h)
    return trans(img.astype(np.float32)), label

if "grade" in parser.labels:
    def filter_func(x):
        return x.grade() < 8
elif "pressure" in parser.labels:
    def filter_func(x):
        return x.grade() < 8
    
def get_TS_dataloader(args):

    dataset = STD(labels=["month", "day", "hour", "pressure", "wind"],
                preprocessed_path=args.preprocessed_path,
                latent_dim= args.out_dim,
                x=          args.labels_input,#args.labels_input,#[0,1,2,3,4],
                y=          args.labels_output,#args.labels_output,#[3,4],
                num_inputs= args.num_inputs,
                num_preds=  args.num_outputs,
                interval=   args.interval,
                filter_func=filter_func,
                prefix =    args.path_to_data,
                pred_diff=  args.pred_diff,
                )

    train, val, test = dataset.random_split([0.7, 0.15, 0.15], split_by="sequence")
    print("Number of sequences in data split.")
    print(f"Training-Validation-Test :\n{len(train)}-\n{len(val))}-\n{len(test)}")


    train_loader = DataLoader(train,
                              batch_size=args.batch_size,
                              shuffle=True,
                              num_workers=args.num_workers)
    val_loader = DataLoader(val,
                            batch_size=args.batch_size,
                            shuffle=False,
                            num_workers=args.num_workers)
    test_loader = DataLoader(test,
                            batch_size=args.batch_size,
                            shuffle=False,
                            num_workers=args.num_workers)

    return train_loader, val_loader, test_loader


Define a dataset loader (includes preprocessed images)

In [None]:
#class STD(DigitalTyphoonDataset): filter_sequences(sequence), __getitem__(self, idx),get_input_size,get_output_size, _labels_from_label_strs(self, image, label_strs),_prepare_labels, get_sequence_images, get_sequence
class STD(DigitalTyphoonDataset):
    # arguments to the following function is still hard coded
    def __init__(self,
                 labels,
                 x,
                 y,
                 num_inputs=6,
                 num_preds=20,
                 interval=1,
                 #output_all=False,
                 preprocessed_path=None,
                 latent_dim=None,
                 pred_diff=False,
                 prefix=r"F:\Data folder for ML\AU\AU",
                 spectrum="Infrared",
                 load_data_into_memory=False,
                 ignore_list=None,
                 filter_func=None,
                 transform_func=None,
                 transform=None,
                 verbose=False) -> None:
        """
        labels: labels to include in ["year", "month", "day", "hour", "grade", "lat", "lng", "pressure", "wind"]
        include_images: boolean to include or not images when generating sequences
        x: sequence data to use as inputs. should be array indices corresponding to the order in which labels are requested
        y: sequence data to use as targets. should be array indices corresponding to the order in which labels are requested
            images, if included are always included
        num_inputs: length of sequence used as input to the model
        num_preds: length of predicted datapoints
        preprocess: preprocess images to a smaller feature vector
        """
        super().__init__(f"{prefix}/image/",
                        f"{prefix}/metadata/",
                        f"{prefix}/metadata.json",
                         labels,
                         "sequence",
                         spectrum,
                         True,
                         load_data_into_memory,
                         ignore_list,
                         filter_func,
                         transform_func,
                         transform,
                         verbose)
        idx = 0
        self.x = []
        self.y = []

        for i, label in enumerate(labels):
            sz = LABEL_SIZE[label] if label in LABEL_SIZE else 1
            if i in x:
                self.x.extend(list(range(idx, idx+sz)))
            if i in y:
                self.y.extend(list(range(idx, idx+sz)))
            idx += sz

        if preprocessed_path is None:
            print("Error : The preprocessed_path was set None or not defined properly")
        else:
            assert latent_dim is not None
            if -1 in self.x:
                self.x.extend(list(range(idx, idx+latent_dim)))
            if -1 in self.y:
                self.y.extend(list(range(idx, idx+latent_dim)))
        def get_input_size(self):
            return len(self.x)

        def get_output_size(self):
            return len(self.y)

        self.preprocessed_path = preprocessed_path
        self.num_inputs = num_inputs
        self.num_preds = num_preds
        self.interval = interval
        #self.output_all = output_all
        self.pred_diff = pred_diff
        self.slice_inputs = lambda start_idx: slice(start_idx, start_idx+(self.num_inputs*self.interval),self.interval)
        self.slice_outputs = lambda start_idx: slice(start_idx+(self.num_inputs*self.interval),start_idx+((self.num_inputs+self.num_preds)*self.interval), self.interval)

        def filter_sequences(sequence):
            # Remove sequences which are smaller than the prediction period or if they are too old, this was found out by trial and error
            if sequence.get_num_images() < (self.num_inputs + self.num_preds)*self.interval+1 or sequence.images[0].year() < 1987:
                return True


        for seq in self.sequences:
            if filter_sequences(seq): # If the image is not acceptible, choose to not use that image
                self.number_of_images -= seq.get_num_images() 
                seq.images.clear()
                self.number_of_nonempty_sequences -= 1

        self.number_of_nonempty_sequences += 1

    def get_input_size(self):
        return len(self.x)
    def get_output_size(self):
        return len(self.y)
    
    def __getitem__(self, idx):
        # Gather images from a sequence and label them in a torch stack
        seq = self.get_ith_sequence(idx)
        images = seq.get_all_images_in_sequence()
        labels = torch.stack([self._labels_from_label_strs(image, self.labels) for image in images])

        if self.preprocessed_path is not None:
            # The images in the preprocessed folder are npz images with keys 'arr_1' and 'arr_2'
            npz = np.load(f"{self.preprocessed_path}/{seq.sequence_str}.npz")
            names_to_features = dict(zip(npz["arr_1"], npz["arr_0"]))
            features = [names_to_features[str(img.image_filepath).split("/")[-1].split(".")[0]]
                        for img in images]
            features = torch.from_numpy(np.array(features))

            labels = torch.cat((labels, features), dim=1)

        #if self.output_all:
        #    return labels, seq.sequence_str

        start_idx = np.random.randint(0, seq.get_num_images()-(self.num_inputs + self.num_preds)*self.interval)
        lab_inputs = labels[self.slice_inputs(start_idx), self.x]
        lab_preds = labels[self.slice_outputs(start_idx), self.y]

        if self.pred_diff:
            lab_preds = lab_preds - labels[self.slice_inputs(start_idx), self.y][-1]

        return lab_inputs, lab_preds



    def _labels_from_label_strs(self, image, label_strs):
        """
        Given an image and the label/labels to retrieve from the image, returns a single label or
        a list of labels

        :param image: image to access labels for
        :param label_strs: either a List of label strings or a single label string
        :return: a List of label strings or a single label string
        """
        if isinstance(label_strs, list) or isinstance(label_strs, tuple):
            label_ray = torch.cat([self._prepare_labels(image.value_from_string(label), label) for label in label_strs])
            return label_ray
        else:
            label = self._prepare_labels(image.value_from_string(label_strs), label_strs)
            return label

    def _prepare_labels(self, value, label):
        if label in LABEL_SIZE:
            one_hot = torch.zeros(LABEL_SIZE[label])
            if label == "hour":
                one_hot[value] = 1
            elif label == "grade":
                one_hot[value-2] = 1
            else:
                one_hot[value-1] = 1
            return one_hot
        else:
            # Normalize
            if label in NORMALIZATION:

                mean, std = NORMALIZATION[label]
                return (torch.Tensor([value]) - mean) / std

            if label == "grade":
                return torch.Tensor([float(value)])

            return torch.Tensor([value])

    def get_sequence_images(self, seq_str):
        def crop(img, cropx=224, cropy=224):
            y,x = img.shape
            startx = x//2-(cropx//2)
            starty = y//2-(cropy//2)
            return img[starty:starty+cropy,startx:startx+cropx]
        idx = self._sequence_str_to_seq_idx[seq_str]
        seq = self.sequences[idx]
        images = seq.get_all_images_in_sequence()
        return [crop(image.image()) for image in images]

    def get_sequence(self, seq_str):
        idx = self._sequence_str_to_seq_idx[seq_str]
        return self.__getitem__(idx)


Load dataset and train model

In [11]:
# Load sequences and train model
train_loader, val_loader, _ = get_TS_dataloader(parser)
TimeSeriesTrainer(train_loader,val_loader,parser).train()


269 train sequences
58 val sequences
57 test sequences
LSTM
Input_size :  69
Hidden_size :  1024
num_layers :  3
Output_size :  8
Resuming training from checkpoint C:/Users/nilss/Desktop/Advanded ML FOLDer/outputs-Typhoon_prediction/models/ts/lstm_10kp_3l_1024_3i_pressure/checkpoint_3.pth
Model has 21,807,624 parameters and trainer is ready
Weights will be saved at 1000 intervals


Training 1/100000: 100%|██████████| 3/3 [00:09<00:00,  3.20s/it, loss=0.781, tr_loss=10.7]


Checkpoint saved in C:/Users/nilss/Desktop/Advanded ML FOLDer/models/ts/lstm_1749224750/checkpoint_6.pth


Eval 1/100000: 100%|██████████| 1/1 [00:02<00:00,  2.07s/it, loss=0.827, val_loss=0.827]
Training 2/100000: 100%|██████████| 3/3 [00:06<00:00,  2.26s/it, loss=147, tr_loss=54.3]   
Eval 2/100000: 100%|██████████| 1/1 [00:01<00:00,  1.38s/it, loss=0.513, val_loss=0.513]
Training 3/100000: 100%|██████████| 3/3 [00:06<00:00,  2.32s/it, loss=0.531, tr_loss=10.4]
Eval 3/100000: 100%|██████████| 1/1 [00:01<00:00,  1.33s/it, loss=0.406, val_loss=0.406]
Training 4/100000: 100%|██████████| 3/3 [00:07<00:00,  2.36s/it, loss=0.462, tr_loss=10.2]
Eval 4/100000: 100%|██████████| 1/1 [00:01<00:00,  1.51s/it, loss=0.495, val_loss=0.495]
Training 5/100000: 100%|██████████| 3/3 [00:06<00:00,  2.31s/it, loss=0.622, tr_loss=9.83]
Eval 5/100000: 100%|██████████| 1/1 [00:01<00:00,  1.42s/it, loss=0.43, val_loss=0.43]
Training 6/100000: 100%|██████████| 3/3 [00:06<00:00,  2.25s/it, loss=0.756, tr_loss=9.16]
Eval 6/100000: 100%|██████████| 1/1 [00:01<00:00,  1.51s/it, loss=0.307, val_loss=0.307]
Training 7/1

KeyboardInterrupt: 