In [None]:
#import all the necessary packages
import matplotlib.pyplot as plt
import os
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import imageio.v2 as imageio
from tqdm.auto import tqdm
from concurrent.futures import ProcessPoolExecutor
from sklearn.model_selection import train_test_split
import random
import os
import cv2
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
import matplotlib.gridspec as gridspec
import matplotlib.gridspec as gridspec

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
input_path = '/content/drive/My Drive/AMLProject/Data/2024-flame-ai-challenge/dataset'
metadata = pd.read_csv(os.path.join(input_path,'train.csv'))
metadata.head()

Unnamed: 0,id,u,alpha,Nt,Nx,Ny,theta_filename,ustar_filename,xi_filename
0,804025,2,10,150,113,32,theta_K_id804025.dat,ustar_ms-1_id804025.dat,xi_id804025.dat
1,875935,2,5,150,113,32,theta_K_id875935.dat,ustar_ms-1_id875935.dat,xi_id875935.dat
2,930086,6,0,150,113,32,theta_K_id930086.dat,ustar_ms-1_id930086.dat,xi_id930086.dat
3,661713,4,5,150,113,32,theta_K_id661713.dat,ustar_ms-1_id661713.dat,xi_id661713.dat
4,633229,2,0,150,113,32,theta_K_id633229.dat,ustar_ms-1_id633229.dat,xi_id633229.dat


In [None]:
#This part of the code was taken from the starter notebook given along with the challenge
def load_data(file_id, data_dir,Nt, Nx, Ny):
    ustar = np.fromfile(os.path.join(data_dir, f"{file_id}_ustar.dat"), dtype=np.float32)
    theta = np.fromfile(os.path.join(data_dir, f"{file_id}_theta.dat"), dtype=np.float32)
    xi = np.fromfile(os.path.join(data_dir, f"{file_id}_xi.dat"), dtype=np.float32)

    # Reshape the data based on Nx, Ny, and Nt (assuming these are known)
    #Nx, Ny, Nt = 100, 100, 25  # Example values, adjust as needed
    ustar = ustar.reshape(Nt, Nx, Ny)
    theta = theta.reshape(Nt, Nx, Ny)
    xi = xi.reshape(Nt, Nx, Ny)

    return ustar, theta, xi

In [None]:
import numpy as np
import cv2
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, mean_squared_error
#Perplexity was used in the following code block
#Create a class for the optical flow model training and its evaluation:
class WildfirePredictor:
  def __init__(self, data_dir, metadata):
      self.data_dir = data_dir
      self.metadata = metadata
      self.Nx = 113
      self.Ny = 32
      self.input_steps = 5
      self.output_steps = 20

  def load_metadata(self):
    # Metadata is passed to the class during initialization
    return self.metadata

  def load_data(self, filename):
    file_path = os.path.join(self.data_dir, filename)
    data = np.fromfile(file_path, dtype=np.float32)
    return data.reshape(-1, self.Nx, self.Ny)

  def create_sliding_windows(self, data):
    windows = []
    for i in range(len(data) - self.input_steps - self.output_steps + 1):
      input_window = data[i:i+self.input_steps]
      output_window = data[i+self.input_steps:i+self.input_steps+self.output_steps]
      windows.append((input_window, output_window))
    return windows

  def optical_flow_prediction(self, prev_frame, curr_frame):
    # Convert binary frames to uint8 (0 and 255) for optical flow calculation
    prev_frame_norm = (prev_frame * 255).astype(np.uint8)
    curr_frame_norm = (curr_frame * 255).astype(np.uint8)

    # Calculate optical flow
    flow = cv2.calcOpticalFlowFarneback(prev_frame_norm, curr_frame_norm, None,
                                      0.5, 3, 15, 3, 5, 1.2, 0)

    # Create mapping for warping
    map_x = np.tile(np.arange(self.Ny), (self.Nx, 1))
    map_y = np.tile(np.arange(self.Nx), (self.Ny, 1)).T

    # Add flow to get new positions
    map_x = map_x + flow[..., 0]
    map_y = map_y + flow[..., 1]

    # Warp the current frame
    next_frame = cv2.remap(curr_frame_norm, map_x.astype(np.float32),
                         map_y.astype(np.float32), cv2.INTER_LINEAR)

    # Convert back to binary (0 or 1) using threshold
    return (next_frame > 127).astype(np.float32)

  def predict_sequence(self, input_sequence):
    predicted_sequence = []
    current_sequence = input_sequence.copy()

    for _ in range(self.output_steps):
      next_frame = self.optical_flow_prediction(current_sequence[-2],
                                                    current_sequence[-1])
      predicted_sequence.append(next_frame)
      current_sequence = np.roll(current_sequence, -1, axis=0)
      current_sequence[-1] = next_frame

    return np.array(predicted_sequence)

  def evaluate_predictions(self, y_true, y_pred):
      y_true_flat = y_true.flatten()
      y_pred_flat = y_pred.flatten()
      metrics = {
          'accuracy': accuracy_score(y_true_flat > 0.5, y_pred_flat > 0.5),
          'f1': f1_score(y_true_flat > 0.5, y_pred_flat > 0.5),
          'roc_auc': roc_auc_score(y_true_flat > 0.5, y_pred_flat),
          'mse': mean_squared_error(y_true_flat, y_pred_flat)
      }
      return metrics

  def visualize_full_sequence(self, test_windows, save_dir='./'):
    import matplotlib.animation as animation

    # Get first test sequence
    input_seq, true_output = test_windows[0]
    pred_output = self.predict_sequence(input_seq)

    # Create GIF for initial conditions (t=0 to t=4)
    fig_init = plt.figure(figsize=(8, 10))

    def init_frame():
        plt.clf()
        return []

    def animate_init(t):
        plt.clf()
        plt.imshow(input_seq[t], vmin=0, vmax=1, cmap='hot')
        plt.colorbar()
        plt.title(f'Initial Condition (t={t})')
        plt.axis('off')
        return []

    anim_init = animation.FuncAnimation(fig_init, animate_init, init_func=init_frame,
                                      frames=5, interval=1000, blit=True)
    anim_init.save(os.path.join(save_dir, 'initial_conditions.gif'), writer='pillow')
    plt.close()

    # Create GIF for actual vs predicted (t=5 to t=24)
    fig_pred = plt.figure(figsize=(15, 8))

    def animate_pred(t):
        plt.clf()

        # Actual
        plt.subplot(1, 2, 1)
        plt.imshow(true_output[t], vmin=0, vmax=1, cmap='hot')
        plt.title(f'Actual (t={t+5})')
        plt.axis('off')

        # Predicted
        plt.subplot(1, 2, 2)
        plt.imshow(pred_output[t], vmin=0, vmax=1, cmap='hot')
        plt.title(f'Predicted (t={t+5})')
        plt.axis('off')

        # Add colorbar
        plt.colorbar(plt.cm.ScalarMappable(cmap='hot',
                    norm=plt.Normalize(0, 1)), ax=plt.gca())
        plt.tight_layout()
        return []

    anim_pred = animation.FuncAnimation(fig_pred, animate_pred, init_func=init_frame,
                                      frames=20, interval=1000, blit=True)
    anim_pred.save(os.path.join(save_dir, 'prediction_comparison.gif'), writer='pillow')
    plt.close()
#Call the class in the main method and evaluate the performance
def main():
    # Initialize predictor
    data_dir = '/content/drive/My Drive/AMLProject/Data/2024-flame-ai-challenge/dataset/train'
    metadata = pd.read_csv(os.path.join(input_path,'train.csv'))
    #print(metadata.head())
    predictor = WildfirePredictor(data_dir,metadata)

    # Load metadata
    metadata = predictor.load_metadata()

    # Process all fire sequences
    all_windows = []
    for _, row in metadata.iterrows():
        xi_data = predictor.load_data(row['xi_filename'])
        windows = predictor.create_sliding_windows(xi_data)
        all_windows.extend(windows)

    # Split data
    train_windows, test_windows = train_test_split(all_windows, test_size=0.2,
                                                 random_state=42)

    # Evaluate on train set
    train_metrics = []
    for input_seq, true_output in train_windows:
        pred_output = predictor.predict_sequence(input_seq)
        metrics = predictor.evaluate_predictions(true_output, pred_output)
        train_metrics.append(metrics)

    # Evaluate on test set
    test_metrics = []
    for input_seq, true_output in test_windows:
        pred_output = predictor.predict_sequence(input_seq)
        metrics = predictor.evaluate_predictions(true_output, pred_output)
        test_metrics.append(metrics)

    # Calculate and print average metrics
    print("Training Results:")
    for metric in ['accuracy', 'f1', 'roc_auc', 'mse']:
        avg_metric = np.mean([m[metric] for m in train_metrics])
        print(f"{metric.upper()}: {avg_metric:.4f}")

    print("\nTesting Results:")
    for metric in ['accuracy', 'f1', 'roc_auc', 'mse']:
        avg_metric = np.mean([m[metric] for m in test_metrics])
        print(f"{metric.upper()}: {avg_metric:.4f}")

    save_dir = './results'  # Specify save directory
    os.makedirs(save_dir, exist_ok=True)  # Create directory if it doesn't exist
    predictor.visualize_full_sequence(test_windows)

if __name__ == "__main__":
    main()

Training Results:
ACCURACY: 0.9656
F1: 0.3418
ROC_AUC: 0.6536
MSE: 0.0344

Testing Results:
ACCURACY: 0.9660
F1: 0.3472
ROC_AUC: 0.6576
MSE: 0.0340


In [None]:
from google.colab import files
files.download('initial_conditions.gif')
files.download('prediction_comparison.gif')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>