In [2]:
import os
import numpy as np
import json
from datetime import datetime, timedelta
import h5py
from glob import glob

from param import output

# Set up paths to data directories
processed_data_dir = '../Datasets/Testing/Processed/'
samples_data_dir = '../Datasets/Testing/Samples/'
data_dir = '../Datasets/Testing/'

# Create a directory for intermediate storage if it doesn't exist
output_dir = os.path.join(data_dir, 'TemporalStacks')
os.makedirs(os.path.join(output_dir), exist_ok=True)

In [3]:
# Function to load the latest GeoJSON file and extract event dates
def load_geojson_dates(samples_data_dir, print_loading=False):
    # Load the most recent sampled events file
    sample_files = glob(os.path.join(samples_data_dir, '*.geojson'))
    if not sample_files:
        raise FileNotFoundError("No .geojson files found in the Samples directory.")
    
    latest_file = max(sample_files, key=os.path.getctime)
    if print_loading:
        print(f"Loading events from {latest_file}")
    
    with open(latest_file) as f:
        data = json.load(f)
    
    # Extract dates and convert to datetime objects
    event_dates = {}
    for feature in data['features']:
        # Extract and format plot ID (remove 'PLOT-' prefix)
        plot_id = feature['properties']['name'].replace('PLOT-', '').strip()
        img_date = feature['properties']['img_date']
        event_dates[plot_id] = datetime.strptime(img_date, '%Y-%m-%d')
    
    return event_dates

# Load event dates from the latest GeoJSON file
event_dates = load_geojson_dates(samples_data_dir, print_loading=True)

Loading events from ../Datasets/Testing/Samples/sampled_events_20241216_153135.geojson


In [4]:
def load_image_patches_with_dates(processed_data_dir):
    """
    Load image patches and their corresponding dates from .npy files
    
    Args:
        processed_data_dir (str): Path to directory containing processed .npy files
        
    Returns:
        dict: Dictionary mapping plot IDs to lists of (patch, date) tuples
    """
    # Dictionary to store patches by plot ID
    plot_data = {}
    
    # Load image patches and their dates
    for file in sorted(glob(os.path.join(processed_data_dir, '*.npy'))):
        # Parse filename components
        basename = os.path.basename(file)
        # Split "20180726T084009_PLOT-00001.npy" into datetime and plot parts
        datetime_str, plot_part = basename.split('_')
        plot_id = plot_part.replace('PLOT-', '').replace('.npy', '')
        
        # Convert datetime string to datetime object
        # Format: "20180726T084009" -> datetime
        date = datetime.strptime(datetime_str, '%Y%m%dT%H%M%S')
        
        # Load the patch data
        patch = np.load(file)
        
        # Initialize list for this plot if it doesn't exist
        if plot_id not in plot_data:
            plot_data[plot_id] = []
            
        # Add (patch, date) tuple to this plot's list
        plot_data[plot_id].append((patch, date))
    
    # Sort patches by date for each plot
    for plot_id in plot_data:
        plot_data[plot_id].sort(key=lambda x: x[1])  # Sort by date
    
    return plot_data

# Load image patches and their dates
plot_data = load_image_patches_with_dates(processed_data_dir)

In [5]:
import os
import numpy as np
import json
from datetime import datetime
from pathlib import Path

def create_temporal_stacks(plot_data, event_dates, output_base_dir):
    """
    Create temporal stacks for each plot
    
    Args:
        plot_data: Dictionary of {plot_id: [(patch, date)]}
        event_dates: Dictionary of {plot_id: event_date}
        output_base_dir: Base directory for output
    """
    base_path = Path(output_base_dir)
    
    for plot_id, event_date in event_dates.items():
        if plot_id not in plot_data:
            continue
            
        # Create plot directory structure
        plot_dir = base_path / f"PLOT-{plot_id}"
        pre_event_dir = plot_dir / "Pre-event"
        post_event_dir = plot_dir / "Post-event"
        
        pre_event_dir.mkdir(parents=True, exist_ok=True)
        post_event_dir.mkdir(parents=True, exist_ok=True)
        
        # Split and save temporal data
        patches = plot_data[plot_id]
        pre_event_patches = []
        post_event_patches = []
        
        for patch, date in patches:
            if date < event_date:
                save_dir = pre_event_dir
                pre_event_patches.append((patch, date))
            else:
                save_dir = post_event_dir
                post_event_patches.append((patch, date))
                
            # Save individual patches with timestamp
            filename = f"{date.strftime('%Y%m%dT%H%M%S')}.npy"
            np.save(save_dir / filename, patch)
        
        # Save metadata
        save_metadata(pre_event_dir, pre_event_patches, event_date, "Pre")
        save_metadata(post_event_dir, post_event_patches, event_date, "Post")

def save_metadata(directory, patches, event_date, stack_type):
    """Save stack metadata"""
    metadata = {
        "num_images": len(patches),
        "event_date": event_date.strftime('%Y-%m-%d'),
        "stack_type": stack_type,
        "image_dates": [date.strftime('%Y-%m-%d') for _, date in patches],
        "shape": patches[0][0].shape if patches else None
    }
    
    with open(directory / "stack_info.json", "w") as f:
        json.dump(metadata, f, indent=2)

In [6]:
# In your Jupyter notebook

# Create temporal stacks
create_temporal_stacks(plot_data, event_dates, output_dir)