In [None]:
import tensorflow as tf

In [None]:
# Define the path to your TFRecord file
tfrecord_path = "/data/ai_club/fire/next_day_fire_spread_data/next_day_wildfire_spread_train_00.tfrecord"

# Load the TFRecord dataset
raw_dataset = tf.data.TFRecordDataset(tfrecord_path)

In [None]:
# Define a function to parse TFRecord examples
def parse_example(example_proto):
    example = tf.train.Example()
    example.ParseFromString(example_proto.numpy())  # Deserialize
    return example

# # Read and print a single parsed record
# for raw_record in raw_dataset.take(1):
#     parsed_example = parse_example(raw_record)
#     print(parsed_example)

In [None]:
import tensorflow as tf

# List all TFRecord files (if known)
tfrecord_files = [
    "/data/ai_club/fire/next_day_fire_spread_data/next_day_wildfire_spread_train_00.tfrecord",
    "/data/ai_club/fire/next_day_fire_spread_data/next_day_wildfire_spread_train_01.tfrecord"
]

# Extract feature keys from multiple files
for file in tfrecord_files:
    print(f"\nChecking file: {file}")
    raw_dataset = tf.data.TFRecordDataset(file)
    
    for raw_record in raw_dataset.take(1):  # Inspect only the first record
        example = tf.train.Example()
        example.ParseFromString(raw_record.numpy())
        print("Feature Keys:", list(example.features.feature.keys()))

In [None]:
# Extract and print feature keys from the first record
for raw_record in raw_dataset.take(1):  # Take only the first record
    example = tf.train.Example()
    example.ParseFromString(raw_record.numpy())  # Deserialize
    feature_keys = list(example.features.feature.keys())  # Get all feature names
    print("Feature Keys:", feature_keys)

# Wildfire Spread Prediction Feature Descriptions

| Feature Key  | Description |
|-------------|-------------|
| `vs`       | **Wind Speed** (m/s) – Measures how fast the wind is moving, affecting fire spread. |
| `th`       | **Theta (Potential Temperature)** – Represents the temperature an air parcel would have if moved adiabatically to a standard pressure level. |
| `population` | **Population Density** – Indicates how many people live in a given area, affecting fire risk and response strategies. |
| `tmmx`     | **Maximum Daily Temperature** (°C) – The highest temperature recorded during the day. |
| `PrevFireMask` | **Previous Fire Presence** (Binary/Mask) – Indicates if there was fire in the area the day before. |
| `elevation` | **Elevation** (meters) – The height above sea level, affecting weather conditions and fire behavior. |
| `sph`      | **Specific Humidity** (kg/kg) – The amount of water vapor per unit of air mass, influencing fuel moisture. |
| `pr`       | **Precipitation** (mm) – The amount of rainfall, which can suppress fire spread. |
| `pdsi`     | **Palmer Drought Severity Index (PDSI)** – A measure of drought conditions, with lower values indicating more severe drought. |
| `erc`      | **Energy Release Component (ERC)** – A fire weather index estimating the potential available energy in live and dead fuels. |
| `FireMask` | **Current Fire Presence** (Binary/Mask) – Indicates if there is an active fire in the area. |
| `NDVI`     | **Normalized Difference Vegetation Index (NDVI)** – A measure of vegetation health, where higher values indicate lush greenery and lower values indicate dry or dead vegetation. |
| `tmmn`     | **Minimum Daily Temperature** (°C) – The lowest temperature recorded during the day. |

---

### Why These Features Matter for Fire Spread Prediction
- **Temperature (`tmmx`, `tmmn`)**: Higher temperatures dry out vegetation, making it more flammable.
- **Humidity (`sph`)**: Lower humidity means drier conditions, increasing fire risk.
- **Wind Speed (`vs`)**: Stronger winds accelerate fire spread and can change fire direction unpredictably.
- **Precipitation (`pr`)**: Rainfall can suppress fire ignition and spread.
- **Drought Index (`pdsi`)**: Long-term dryness can make regions more susceptible to wildfires.
- **Energy Release Component (`erc`)**: Higher ERC values indicate more available fuel energy, leading to more intense fires.
- **Vegetation Health (`NDVI`)**: Green vegetation retains moisture, while dry vegetation is more flammable.
- **Elevation (`elevation`)**: Fires behave differently at higher altitudes due to changes in oxygen levels and terrain.
- **Fire Mask Features (`FireMask`, `PrevFireMask`)**: Indicate whether there were past or present wildfires in a given area, helping models track fire movement.

In [3]:
import tensorflow as tf

# Define dataset file paths
tfrecord_dir = "/data/ai_club/fire/next_day_fire_spread_data/"
tfrecord_files = [
    tfrecord_dir + fname for fname in [
        "next_day_wildfire_spread_train_00.tfrecord", "next_day_wildfire_spread_train_01.tfrecord",
        "next_day_wildfire_spread_train_02.tfrecord", "next_day_wildfire_spread_train_03.tfrecord",
        "next_day_wildfire_spread_train_04.tfrecord", "next_day_wildfire_spread_train_05.tfrecord",
        "next_day_wildfire_spread_train_06.tfrecord", "next_day_wildfire_spread_train_07.tfrecord",
        "next_day_wildfire_spread_train_08.tfrecord", "next_day_wildfire_spread_train_09.tfrecord",
        "next_day_wildfire_spread_train_10.tfrecord", "next_day_wildfire_spread_train_11.tfrecord",
        "next_day_wildfire_spread_train_12.tfrecord", "next_day_wildfire_spread_train_13.tfrecord",
        "next_day_wildfire_spread_train_14.tfrecord"
    ]
]

# Load first record to check feature structure
def get_feature_names(tfrecord_files):
    raw_dataset = tf.data.TFRecordDataset(tfrecord_files)
    for raw_record in raw_dataset.take(1):  # Take first record
        example = tf.train.Example()
        example.ParseFromString(raw_record.numpy())
        return list(example.features.feature.keys())  # Extract feature names

feature_names = get_feature_names(tfrecord_files)
print("Feature Names:", feature_names)

Feature Names: ['vs', 'th', 'population', 'tmmx', 'PrevFireMask', 'elevation', 'sph', 'pr', 'pdsi', 'erc', 'FireMask', 'NDVI', 'tmmn']


In [4]:
def get_feature_schema(tfrecord_file):
    raw_dataset = tf.data.TFRecordDataset(tfrecord_file)
    for raw_record in raw_dataset.take(1):  # Inspect first record
        example = tf.train.Example()
        example.ParseFromString(raw_record.numpy())
        
        schema = {}
        for key, feature in example.features.feature.items():
            dtype = feature.WhichOneof("kind")
            if dtype == "float_list":
                schema[key] = tf.io.FixedLenFeature([len(feature.float_list.value)], tf.float32)
            elif dtype == "int64_list":
                schema[key] = tf.io.FixedLenFeature([len(feature.int64_list.value)], tf.int64)
            elif dtype == "bytes_list":
                schema[key] = tf.io.FixedLenFeature([], tf.string)
        return schema

feature_schema = get_feature_schema(tfrecord_files[0])
print("Feature Schema:", feature_schema)

Feature Schema: {'vs': FixedLenFeature(shape=[4096], dtype=tf.float32, default_value=None), 'th': FixedLenFeature(shape=[4096], dtype=tf.float32, default_value=None), 'population': FixedLenFeature(shape=[4096], dtype=tf.float32, default_value=None), 'tmmx': FixedLenFeature(shape=[4096], dtype=tf.float32, default_value=None), 'PrevFireMask': FixedLenFeature(shape=[4096], dtype=tf.float32, default_value=None), 'elevation': FixedLenFeature(shape=[4096], dtype=tf.float32, default_value=None), 'sph': FixedLenFeature(shape=[4096], dtype=tf.float32, default_value=None), 'pr': FixedLenFeature(shape=[4096], dtype=tf.float32, default_value=None), 'pdsi': FixedLenFeature(shape=[4096], dtype=tf.float32, default_value=None), 'erc': FixedLenFeature(shape=[4096], dtype=tf.float32, default_value=None), 'FireMask': FixedLenFeature(shape=[4096], dtype=tf.float32, default_value=None), 'NDVI': FixedLenFeature(shape=[4096], dtype=tf.float32, default_value=None), 'tmmn': FixedLenFeature(shape=[4096], dtype=

In [5]:
def parse_tfrecord_fn(example_proto):
    return tf.io.parse_single_example(example_proto, feature_schema)

def load_dataset(filenames, batch_size=32):
    dataset = tf.data.TFRecordDataset(filenames)
    dataset = dataset.map(parse_tfrecord_fn)
    dataset = dataset.shuffle(10000).batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return dataset

# Load training, evaluation, and test sets
train_dataset = load_dataset(tfrecord_files)

In [10]:
import numpy as np

# Identify spatial features for CNN
cnn_features = ["NDVI", "elevation", "FireMask"]

# Identify sequential features for LSTM
lstm_features = ["tmmx", "tmmn", "sph", "pr", "pdsi", "erc"]

# Identify other features for dense layers
other_features = ["vs", "th", "population"]

def extract_features(example):
    # Reshape CNN input (assuming 64x64 grid)
    cnn_input = tf.reshape(tf.stack([example[key] for key in cnn_features], axis=-1), (64, 64, 3))  
   
    # LSTM input (unchanged)
    lstm_input = tf.stack([example[key] for key in lstm_features], axis=-1)  # Shape: (batch, sequence_length, features)
    
    # Dense input (unchanged)
    dense_input = tf.stack([example[key] for key in other_features], axis=-1)  # Shape: (batch, num_features)
    
    # Target label
    label = example["FireMask"]

    return (cnn_input, lstm_input, dense_input), label

# Apply to dataset
train_dataset = train_dataset.map(extract_features)

TypeError: in user code:


    TypeError: outer_factory.<locals>.inner_factory.<locals>.tf__extract_features() takes 1 positional argument but 2 were given


In [11]:
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, LSTM, Dense, Concatenate
from tensorflow.keras.models import Model

# CNN Branch
cnn_input = Input(shape=(64, 64, 3), name="cnn_input")  # Assuming 64x64 spatial features
x = Conv2D(32, (3, 3), activation="relu")(cnn_input)
x = MaxPooling2D((2, 2))(x)
x = Conv2D(64, (3, 3), activation="relu")(x)
x = MaxPooling2D((2, 2))(x)
x = Flatten()(x)

# LSTM Branch
lstm_input = Input(shape=(10, len(lstm_features)), name="lstm_input")  # Assuming sequence length of 10
y = LSTM(64, return_sequences=True)(lstm_input)
y = LSTM(32)(y)

# Dense Branch
dense_input = Input(shape=(len(other_features),), name="dense_input")
z = Dense(32, activation="relu")(dense_input)

# Concatenate all branches
merged = Concatenate()([x, y, z])
output = Dense(1, activation="sigmoid")(merged)  # Predict fire spread probability

# Build Model
model = Model(inputs=[cnn_input, lstm_input, dense_input], outputs=output)
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

model.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 cnn_input (InputLayer)      [(None, 64, 64, 3)]          0         []                            
                                                                                                  
 conv2d_2 (Conv2D)           (None, 62, 62, 32)           896       ['cnn_input[0][0]']           
                                                                                                  
 max_pooling2d_2 (MaxPoolin  (None, 31, 31, 32)           0         ['conv2d_2[0][0]']            
 g2D)                                                                                             
                                                                                                  
 conv2d_3 (Conv2D)           (None, 29, 29, 64)           18496     ['max_pooling2d_2[0][0]'

In [12]:
# Train the model
model.fit(train_dataset, epochs=20, batch_size=32)

Epoch 1/20


ValueError: in user code:

    File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1401, in train_function  *
        return step_function(self, iterator)
    File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1384, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1373, in run_step  **
        outputs = model.train_step(data)
    File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1150, in train_step
        y_pred = self(x, training=True)
    File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/input_spec.py", line 298, in assert_input_compatibility
        raise ValueError(

    ValueError: Input 0 of layer "model_1" is incompatible with the layer: expected shape=(None, 64, 64, 3), found shape=(None, 4096, 3)
