In [None]:
import os
import requests
import tarfile

# Constants
DOWNLOAD_DATA = True
DATA_DIR = './data'  # Directory containing .tar.gz files
EXTRACT_DIR = os.path.join(DATA_DIR, 'extracted')

# Bucket and endpoint configuration
CUSTOM_ENDPOINT = "bbproxy.meyerstk.com/file"
APP = "TorNetBecauseZenodoSlow"  # Bucket name

# Ensure directories exist
os.makedirs(EXTRACT_DIR, exist_ok=True)

def download_files():
    """
    Download files from a public Backblaze B2 bucket served via a custom endpoint.
    """
    print("Starting download process...")
    
    # Fetch the list of files from the bucket
    # Replace this with a pre-generated file list if necessary
    file_list = [
        # Add your files here if listing is not available
        "tornet_2013.tar.gz",
        "tornet_2014.tar.gz",
        "tornet_2015.tar.gz",
        "tornet_2016.tar.gz",
        "tornet_2017.tar.gz",
        "tornet_2018.tar.gz",
        "tornet_2019.tar.gz",
        "tornet_2020.tar.gz",
        "tornet_2021.tar.gz",
        "tornet_2022.tar.gz",
    ]

    for file_name in file_list:
        # Construct the public URL for each file
        url = f"https://{CUSTOM_ENDPOINT}/{APP}/{file_name}"
        local_file_path = os.path.join(DATA_DIR, file_name)

        # Check if the file already exists locally
        if not os.path.exists(local_file_path):
            print(f"Downloading {file_name} from {url}...")
            response = requests.get(url, stream=True)
            if response.status_code == 200:
                with open(local_file_path, 'wb') as file:
                    for chunk in response.iter_content(chunk_size=8192):
                        file.write(chunk)
                print(f"Downloaded {file_name} to {local_file_path}")
            else:
                print(f"Failed to download {file_name}. Status code: {response.status_code}")
        else:
            print(f"File {file_name} already exists in {DATA_DIR}")

def extract_local_tar_files():
    """
    Extract all .tar.gz files from the local DATA_DIR to EXTRACT_DIR.
    """
    print("Starting extraction process...")
    for file_name in os.listdir(DATA_DIR):
        if file_name.endswith('.tar.gz'):
            file_path = os.path.join(DATA_DIR, file_name)
            print(f'Extracting {file_path}...')
            with tarfile.open(file_path, 'r:gz') as tar:
                tar.extractall(path=EXTRACT_DIR)
            print(f'Extracted {file_path} to {EXTRACT_DIR}')

if DOWNLOAD_DATA:
    download_files()

# Call the function to process the local .tar.gz files
extract_local_tar_files()

In [None]:
import os
import xarray as xr
import numpy as np

VARIABLES = ['DBZ', 'VEL', 'KDP', 'RHOHV', 'ZDR', 'WIDTH']
TIME_STEPS = 3
SWEEPS = 2
IMAGE_HEIGHT = 120
IMAGE_WIDTH = 240

def load_nc_file(file_path):
    """
    Load a single NetCDF file and extract the required data and label.
    """
    with xr.open_dataset(file_path, engine="netcdf4") as ds:
        data = np.empty((IMAGE_HEIGHT, IMAGE_WIDTH, len(VARIABLES) * TIME_STEPS * SWEEPS), dtype=np.float32)
        idx = 0
        for t in range(TIME_STEPS):
            for s in range(SWEEPS):
                for var in VARIABLES:
                    channel_data = ds[var].isel(time=t, sweep=s).values
                    np.nan_to_num(channel_data, copy=False, nan=0, posinf=0, neginf=0)
                    data[:, :, idx] = channel_data
                    idx += 1
        label = 1 if 'TOR' in os.path.basename(file_path) else 0
    return data, label

def create_dataset(directory):
    """
    Load all NetCDF files from a directory and return arrays of data and labels.
    """
    data, labels = [], []
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith('.nc'):
                img, label = load_nc_file(os.path.join(root, file))
                data.append(img)
                labels.append(label)
    return np.array(data), np.array(labels)

# Dataset paths
TRAIN_DIR = os.path.join(EXTRACT_DIR, "train")
TEST_DIR = os.path.join(EXTRACT_DIR, "test")

# Load datasets
X_train, y_train = create_dataset(TRAIN_DIR)
X_test, y_test = create_dataset(TEST_DIR)

# Print dataset shapes
print(f"Train data shape: {X_train.shape}, Train labels shape: {y_train.shape}")
print(f"Test data shape: {X_test.shape}, Test labels shape: {y_test.shape}")

In [None]:
from tensorflow.keras import models, layers
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.metrics import Precision, Recall, AUC
from tensorflow.keras.regularizers import l2

# Model Definition
def create_torcnn(input_shape=(120, 240, 36), dropout_rate=0.3):
    """
    Define the CNN model for tornado detection.
    """
    model = models.Sequential([
        # Block 1
        layers.Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=input_shape),
        layers.Conv2D(32, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(dropout_rate),

        # Block 2
        layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
        layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(dropout_rate),

        # Block 3
        layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
        layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(dropout_rate),

        # Block 4
        layers.Conv2D(256, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(dropout_rate),

        # Fully Connected Layers
        layers.Flatten(),
        layers.Dense(128, activation='relu', kernel_regularizer=l2(0.01)),
        layers.BatchNormalization(),
        layers.Dropout(0.4),
        layers.Dense(64, activation='relu', kernel_regularizer=l2(0.01)),
        layers.BatchNormalization(),
        layers.Dropout(0.4),

        # Output Layer
        layers.Dense(1, activation='sigmoid')
    ])

    # Compile the model
    model.compile(
        optimizer=Adam(learning_rate=0.0005),
        loss=BinaryCrossentropy(),
        metrics=['accuracy', Precision(), Recall(), AUC()]
    )
    return model

# Create Model
input_shape = (120, 240, len(VARIABLES) * TIME_STEPS * SWEEPS)
model = create_torcnn(input_shape=input_shape)

In [None]:
BATCH_SIZE = 32
EPOCHS = 10

print("Starting model training...")
history = model.fit(
    X_train, y_train,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=(X_test, y_test)
)

In [None]:
print("Evaluating the model...")
results = model.evaluate(X_test, y_test)
print(f"Test Loss: {results[0]}, Test Accuracy: {results[1]}")