<a href="https://colab.research.google.com/github/saratutuianu/Tornado-detection-using-radar-images/blob/main/tornado_detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -r ./drive/MyDrive/proiect_ml/requirements/torch.txt



In [None]:
import sys

sys.path.append('/content/drive/MyDrive/proiect_ml')

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import importlib, linecache

import torch
from torch import optim, nn
from torch.utils.data import Dataset
from torchvision import transforms, utils

import tornet.data.preprocess as preprocess

linecache.checkcache(preprocess.__file__)      # invalidează cache-ul de linii
preprocess = importlib.reload(preprocess)

from tornet.data.loader import read_file, TornadoDataLoader
from tornet.data.preprocess import add_coordinates, permute_dims, remove_tilt_dim, get_shape
from tornet.data.constants import ALL_VARIABLES

data_root=r'./drive/MyDrive/proiect_ml/tornet_dataset'

year = 2019

catalog_path = os.path.join(data_root,'catalog.csv')

catalog = pd.read_csv(catalog_path,parse_dates=['start_time','end_time'])

catalog['J'] = catalog['start_time'].dt.dayofyear
catalog['r'] = catalog['J'] % 20
catalog = catalog[catalog.start_time.dt.year == 2019]

catalog_test = catalog[catalog['type'] == 'test']
catalog_test = catalog_test.sample(frac=1,random_state=1234) # shuffles list

catalog = catalog[catalog['type']=='train']

catalog_train = catalog[catalog['r'] <= 13]
catalog_train = catalog_train.sample(frac=1,random_state=1234) # shuffles list

catalog_validation = catalog[(catalog['r'] > 13) & (catalog['r'] < 17)]
catalog_validation = catalog_validation.sample(frac=1,random_state=1234) # shuffles list

confirmed_number = len(catalog_train[catalog_train['category'] == 'TOR'])
total_number = len(catalog_train)

class TornadoDataset(TornadoDataLoader,Dataset):
    pass

transform = transforms.Compose([
            lambda d: remove_tilt_dim(d)
            ])

file_list_train = [os.path.join(data_root,f) for f in catalog_train.filename]
file_list_validation = [os.path.join(data_root,f) for f in catalog_validation.filename]
file_list_test = [os.path.join(data_root,f) for f in catalog_test.filename]

torch_ds_train = TornadoDataset(file_list_train,
                          variables=ALL_VARIABLES,
                          n_frames=4,
                          tilt_last=False, # so ordering of dims is [time,tilt,az,range]
                          transform=transform)
torch_ds_validation = TornadoDataset(file_list_validation,
                          variables=ALL_VARIABLES,
                          n_frames=4,
                          tilt_last=False, # so ordering of dims is [time,tilt,az,range]
                          transform=transform)
torch_ds_test = TornadoDataset(file_list_test,
                          variables=ALL_VARIABLES,
                          n_frames=4,
                          tilt_last=False, # so ordering of dims is [time,tilt,az,range]
                          transform=transform)


batch_size=16

torch_dl_train = torch.utils.data.DataLoader( torch_ds_train,
                                        batch_size=batch_size,
                                        num_workers=8 )

torch_dl_validation = torch.utils.data.DataLoader( torch_ds_validation,
                                        batch_size=batch_size,
                                        num_workers=8 )

torch_dl_test = torch.utils.data.DataLoader( torch_ds_test,
                                        batch_size=batch_size,
                                        num_workers=8 )

In [None]:
from tornet.models.torch.cnn_baseline import NormalizeVariable
from tornet.data.constants import CHANNEL_MIN_MAX

def conv3d_bn_block(in_channels, out_channels, kernel_size=3, stride=1, padding='same'):
    return nn.Sequential(
        nn.Conv3d(in_channels, out_channels, kernel_size, stride, padding, bias=False),
        nn.BatchNorm3d(out_channels),
        nn.ReLU(inplace=True)
    )

def conv3d_transpose_bn_block(in_channels, out_channels, kernel_size=3, stride=1, padding=1):
    return nn.Sequential(
        nn.ConvTranspose3d(in_channels, out_channels, kernel_size, stride, padding, bias=False),
    )

class TornadoLikelihood(nn.Module):
    def __init__(self,radar_variables=ALL_VARIABLES):
      super(TornadoLikelihood, self).__init__()
      self.radar_variables=radar_variables

      # Partea de encoder
      self.conv_layer_encoder1 = conv3d_bn_block(in_channels=len(radar_variables), out_channels=16)
      self.conv_layer_encoder2 = conv3d_bn_block(in_channels=16, out_channels=16)

      self.max_pool_encoder1 = nn.MaxPool3d(kernel_size=(1,2,2), stride=(1,2,2))

      self.conv_layer_encoder3 = conv3d_bn_block(in_channels=16, out_channels=32)
      self.conv_layer_encoder4 = conv3d_bn_block(in_channels=32, out_channels=32)

      self.max_pool_encoder2 = nn.MaxPool3d(kernel_size=(1,2,2), stride=(1,2,2))

      self.conv_layer_encoder5 = conv3d_bn_block(in_channels=32, out_channels=64)
      self.conv_layer_encoder6 = conv3d_bn_block(in_channels=64, out_channels=64)

      self.max_pool_encoder3 = nn.MaxPool3d(kernel_size=2, stride=2)

      self.conv_layer_encoder7 = conv3d_bn_block(in_channels=64, out_channels=128)
      self.conv_layer_encoder8 = conv3d_bn_block(in_channels=128, out_channels=128)

      self.max_pool_encoder4 = nn.MaxPool3d(kernel_size=2, stride=2)

      # Partea de decoder
      self.conv_layer_decoder1 = conv3d_bn_block(in_channels=128, out_channels=128)
      self.conv_layer_decoder2 = conv3d_bn_block(in_channels=128, out_channels=128)

      self.upsample_decoder1 = nn.Upsample(scale_factor=2, mode='trilinear', align_corners=True)

      self.conv_layer_decoder3 = conv3d_bn_block(in_channels=128, out_channels=64)
      self.conv_layer_decoder4 = conv3d_bn_block(in_channels=64, out_channels=64)

      self.upsample_decoder2 = nn.Upsample(scale_factor=2, mode='trilinear', align_corners=True)

      self.conv_layer_decoder5 = conv3d_bn_block(in_channels=64, out_channels=32)
      self.conv_layer_decoder6 = conv3d_bn_block(in_channels=32, out_channels=32)

      self.upsample_decoder3 = nn.Upsample(scale_factor=(1,2,2), mode='trilinear', align_corners=True)

      self.conv_layer_decoder7 = conv3d_bn_block(in_channels=32, out_channels=16)
      self.conv_layer_decoder8 = conv3d_bn_block(in_channels=16, out_channels=16)

      self.upsample_decoder4 = nn.Upsample(scale_factor=(1,2,2), mode='trilinear', align_corners=True)

      self.conv_layer_final = conv3d_transpose_bn_block(in_channels=16, out_channels=1)

    def _normalize_inputs(self,data):
        normed_data = {}
        for v in self.radar_variables:
            min_max = np.array(CHANNEL_MIN_MAX[v]) # [2,]
            scale = 1/(min_max[1]-min_max[0])
            offset = min_max[0]
            normed_data[v] = (data[v] - offset) * scale

        return normed_data

    def forward(self,x):
      """
      Assumes x contains radar varialbes on [batch,tilt,az,rng]
      """
      # extract radar inputs
      x = {v:x[v] for v in self.radar_variables} # each [batch,time,Az,Rng]
      # normalize
      x = self._normalize_inputs(x) # each [batch,time,Az,Rng]
      # concatenate along channel (time) dim
      x = torch.stack([x[v] for v in self.radar_variables], dim=1)

      x = torch.where(torch.isnan(x),-3,x)

      x = self.conv_layer_encoder1(x)
      x = self.conv_layer_encoder2(x)
      x = self.max_pool_encoder1(x)

      x = self.conv_layer_encoder3(x)
      x = self.conv_layer_encoder4(x)
      x = self.max_pool_encoder2(x)

      x = self.conv_layer_encoder5(x)
      x = self.conv_layer_encoder6(x)
      x = self.max_pool_encoder3(x)

      x = self.conv_layer_encoder7(x)
      x = self.conv_layer_encoder8(x)
      x = self.max_pool_encoder4(x)

      x = self.conv_layer_decoder1(x)
      x = self.conv_layer_decoder2(x)
      x = self.upsample_decoder1(x)

      x = self.conv_layer_decoder3(x)
      x = self.conv_layer_decoder4(x)
      x = self.upsample_decoder2(x)

      x = self.conv_layer_decoder5(x)
      x = self.conv_layer_decoder6(x)
      x = self.upsample_decoder3(x)

      x = self.conv_layer_decoder7(x)
      x = self.conv_layer_decoder8(x)
      x = self.upsample_decoder4(x)

      x = self.conv_layer_final(x)

      return x


In [None]:
import torchmetrics
from torchmetrics.functional.classification import binary_auroc, binary_average_precision

def accuracy(logits, y):
  correct = 0
  for i in range(len(logits)):
    if logits[i] >= 0.24 and y[i] == 1:
      correct += 1
    elif logits[i] < 0.24 and y[i] == 0:
      correct += 1

  return correct / float(len(y)) * 100.0

def recall(logits, y):
  correct = 0
  false_negatives = 0
  for i in range(len(logits)):
    if logits[i] >= 0.24 and y[i] == 1:
      correct += 1
    elif logits[i] < 0.24 and y[i] == 1:
      false_negatives += 1

  return 0.0 if correct + false_negatives == 0 else correct / (correct + false_negatives) * 100.0

def precision(logits, y):
  correct = 0
  false_positives = 0
  for i in range(len(logits)):
    if logits[i] >= 0.24 and y[i] == 1:
      correct += 1
    elif logits[i] >= 0.24 and y[i] == 0:
      false_positives += 1

  return 0.0 if correct + false_positives == 0 else correct / (correct + false_positives) * 100.0

def f1_score(logits, y):
  p = precision(logits, y)
  r = recall(logits, y)
  return 0.0 if p + r == 0 else 2 * (p * r) / (p + r)

def apply_metrics(logits, y):
  print(f"Accuracy: , {accuracy(logits, y):.4f}")
  print(f"Recall: , {recall(logits, y):.4f}")
  print(f"Precision: , {precision(logits, y):.4f}")
  print(f"F1 Score: , {f1_score(logits, y):.4f}")
  print(f"AUROC:  {binary_auroc(logits, y).item():.4f}")
  print(f"AUPRC:  {binary_average_precision(logits, y).item():.4f}")



In [None]:
def RMSprop(index, gamma, dw, db, vdw, vdb, learning_rate):
    vdw = gamma * vdw + (1 - gamma) * dw ** 2
    vdb = gamma * vdb + (1 - gamma) * db ** 2

    model.layers[index].weight -= (learning_rate / (np.sqrt(vdw + 1e-08))) * dw
    model.layers[index].bias -= (learning_rate / (np.sqrt(vdb + 1e-08))) * db

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.tensorboard import SummaryWriter

from typing import Dict, List, Tuple, Any
import numpy as np

from torch.optim.lr_scheduler import StepLR
import torch.nn.functional as F

num_epochs = 15

best_val = 10

model = TornadoLikelihood()
pos_weight = torch.tensor([(total_number - confirmed_number)/confirmed_number])
# De ce label smoothing? Nu e niciodata 100% ca e tornada, SA NU UITI
loss_f = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
optimizer = optim.RMSprop(model.parameters(), lr=1e-3)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5)

# TensorBoard setup
writer = SummaryWriter('/content/drive/MyDrive/proiect_ml/summary')

ckpt_path = "/content/drive/MyDrive/proiect_ml/checkpoints/epoch_002.pth"
ckpt = torch.load(ckpt_path)

model.load_state_dict(ckpt["model_state"])
optimizer.load_state_dict(ckpt["optimizer_state"])
scheduler.load_state_dict(ckpt["scheduler_state"])

start_epoch = int(ckpt["epoch"]) + 1   # <- va fi 1
best_val = float(ckpt.get("val_loss", 1e9))

print(f"Reluare din {ckpt_path} | start_epoch={start_epoch} | best_val={best_val:.4f}")

# Training loop
total_step = len(torch_dl_train)
for epoch in range(start_epoch, num_epochs):
    model.train()
    train_loss = 0.0
    for i, batch in enumerate(torch_dl_train):
        y = torch.squeeze(batch.pop('label'))
        y = y[:, -1].float()

        # Forward pass
        logits = model(batch) # [batch,1,T,L,W]
        logits = F.max_pool3d(logits, kernel_size=logits.size()[2:]) # [batch,1,1,1,1]
        # logits = torch.cat( (-logits,logits),axis=1)  # [batch,2,1,1,1]
        logits = torch.squeeze(logits) # [batch,2] for binary classifications
        loss = loss_f(logits, y)

        # Backward and optimize
        optimizer.zero_grad(set_to_none=True)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()

        if (i+1) % 10 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{total_step}], Loss: {loss.item():.4f}')

        if (i + 1) % 500 == 0:
          ckpt_path = f"/content/drive/MyDrive/proiect_ml/checkpoints/epoch_{epoch+1:03d}.pth"
          torch.save({
              "epoch": epoch,
              "model_state": model.state_dict(),
              "optimizer_state": optimizer.state_dict(),
              "scheduler_state": scheduler.state_dict(),
              "val_loss": train_loss,
          }, ckpt_path)

    # Calculate average training loss for the epoch
    avg_train_loss = train_loss / len(torch_dl_train)
    print(f'Epoch [{epoch+1}/{num_epochs}], Average Training Loss: {avg_train_loss:.4f}')
    writer.add_scalar('training loss', avg_train_loss, epoch)

    ckpt_path = f"/content/drive/MyDrive/proiect_ml/checkpoints/epoch_{epoch+1:03d}.pth"
    torch.save({
        "epoch": epoch,
        "model_state": model.state_dict(),
        "optimizer_state": optimizer.state_dict(),
        "scheduler_state": scheduler.state_dict(),
        "val_loss": avg_train_loss,
    }, ckpt_path)

    # Validation
    all_probs = []
    all_labels = []

    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        val_loss = 0.0
        for i, batch in enumerate(torch_dl_validation):
            y = torch.squeeze(batch.pop('label'))
            y = y[:, -1].float()

            # Forward pass
            logits = model(batch) # [batch,1,T,L,W]
            logits = F.max_pool3d(logits, kernel_size=logits.size()[2:]) # [batch,1,1,1,1]
            # logits = torch.cat( (-logits,logits),axis=1)  # [batch,2,1,1,1]
            logits = torch.squeeze(logits) # [batch,2] for binary classification
            loss = loss_f(logits, y)

            all_probs.append(logits)
            all_labels.append(y.reshape(-1))

            val_loss += loss.item()

        avg_val_loss = val_loss / len(torch_dl_validation)
        writer.add_scalar('validation loss', avg_val_loss, epoch)
        all_probs = torch.cat(all_probs, dim = 0)
        all_labels = torch.cat(all_labels, dim = 0)
        apply_metrics(all_probs, all_labels)

    if avg_val_loss < best_val:
        best_val = avg_val_loss
        best_path = "/content/drive/MyDrive/proiect_ml/checkpoints/best.pth"
        torch.save({
            "epoch": epoch,
            "model_state": model.state_dict(),
            "optimizer_state": optimizer.state_dict(),
            "scheduler_state": scheduler.state_dict(),
            "val_loss": avg_val_loss,
        }, best_path)
        print(f"[Epoch {epoch+1}] Best model salvat la {best_path} (val_loss={best_val:.4f})")

    # Learning rate scheduling
    scheduler.step(avg_val_loss)

# Final test

all_probs = []
all_labels = []

model.eval()
with torch.no_grad():
  for i, batch in enumerate(torch_dl_test):
        y = torch.squeeze(batch.pop('label'))
        y = y[:, -1].float()

        all_labels.append(y.reshape(-1))

        # Forward pass
        logits = model(batch) # [batch,1,T,L,W]
        logits = F.max_pool3d(logits, kernel_size=logits.size()[2:]) # [batch,1,1,1,1]
        # logits = torch.cat( (-logits,logits),axis=1)  # [batch,2,1,1,1]
        logits = torch.squeeze(logits) # [batch,2] for binary classification

        all_probs.append(logits)

all_probs = torch.cat(all_probs, dim = 0)
all_labels = torch.cat(all_labels, dim = 0)
apply_metrics(all_probs, all_labels)

writer.close()

# Save the model
# torch.save(model.state_dict(), 'tornado_cnn.pth')


Reluare din /content/drive/MyDrive/proiect_ml/checkpoints/epoch_002.pth | start_epoch=2 | best_val=1.2577


Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c47945f0900>
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1664, in __del__
    self._shutdown_workers()Exception ignored in: 
<function _MultiProcessingDataLoaderIter.__del__ at 0x7c47945f0900>Exception ignored in: 
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1647, in _shutdown_workers
Traceback (most recent call last):
<function _MultiProcessingDataLoaderIter.__del__ at 0x7c47945f0900>  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1664, in __del__
    
if w.is_alive():Traceback (most recent call last):
    
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1664, in __del__
self._shutdown_workers() 
   File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1647, in _shutdown_workers
     self

Epoch [3/15], Step [10/1030], Loss: 1.7924
Epoch [3/15], Step [20/1030], Loss: 1.0609
Epoch [3/15], Step [30/1030], Loss: 0.6949
Epoch [3/15], Step [40/1030], Loss: 2.1582
Epoch [3/15], Step [50/1030], Loss: 2.1582
Epoch [3/15], Step [60/1030], Loss: 1.0607
Epoch [3/15], Step [70/1030], Loss: 1.0608
Epoch [3/15], Step [80/1030], Loss: 1.4268
Epoch [3/15], Step [90/1030], Loss: 1.4272
Epoch [3/15], Step [100/1030], Loss: 1.4273
Epoch [3/15], Step [110/1030], Loss: 1.4277
Epoch [3/15], Step [120/1030], Loss: 1.4271
Epoch [3/15], Step [130/1030], Loss: 1.4268
Epoch [3/15], Step [140/1030], Loss: 1.0602
Epoch [3/15], Step [150/1030], Loss: 1.0230
Epoch [3/15], Step [160/1030], Loss: 0.4553
Epoch [3/15], Step [170/1030], Loss: 1.1225
Epoch [3/15], Step [180/1030], Loss: 1.5032
Epoch [3/15], Step [190/1030], Loss: 1.4370
Epoch [3/15], Step [200/1030], Loss: 1.8410
Epoch [3/15], Step [210/1030], Loss: 1.4495
Epoch [3/15], Step [220/1030], Loss: 0.6442
Epoch [3/15], Step [230/1030], Loss: 1.45

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c47945f0900>
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1664, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1647, in _shutdown_workers
    if w.is_alive():
       ^^^^^^^^^^^^
  File "/usr/lib/python3.12/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c47945f0900>
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1664, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 16

IndexError: too many indices for tensor of dimension 1