In [1]:
import sys, math
from collections import namedtuple
import numpy as np
import pandas as pd
from scipy.ndimage import gaussian_filter
import pygame
import random
import lzma
import os
from tqdm import tqdm

DATA_DIRECTORY = "data/2023_2/"
INPUT_FILE = 'KA050_processed_10cm_5h_20230614.pkl.xz'

def load_data(source_dir, input_file, scale = None, arena_dim = None):
    data = None
    with lzma.open(os.path.join(source_dir, input_file)) as file:
        data = pd.read_pickle(file)
    return data.iloc[::int(scale)] if scale else data


def process_data(data, arena_dim):
    data_len = len(data)
    arena_bb = find_bounding_box(data)
    origin_arena = calculate_circle(*arena_bb)

    translation, scale = circle_transformation(origin_arena, arena_dim)

    apply_transform_scale(data, translation, scale)

    return data

data = load_data(DATA_DIRECTORY, INPUT_FILE)

pygame 2.5.2 (SDL 2.28.3, Python 3.11.9)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [2]:
data[0]

Unnamed: 0,x,y
0,180.0,225.0
1,180.0,225.0
2,180.0,225.0
3,180.0,224.0
4,180.0,224.0
...,...,...
863998,522.0,418.0
863999,522.0,418.0
864000,522.0,418.0
864001,522.0,418.0


In [3]:
def calculate_pheromone_intensity_time_series_array_discrete(
    data: pd.DataFrame,
    grid_size: tuple = (900, 900),
    coarse_grid_size: tuple = (20, 20),
    bounding_box: int = 5,
    decay_rate: float = 0.01,
    deposition_rate: float = 0.1,
    diffusion_sigma: float = 1,
    scale: bool = True,
    frame_rate: int = 60,
    snapshot_interval_sec: float = 1.0,
    save_to_file: bool = False,
    output_file_path: str = "pheromone_time_series.npy"
) -> np.ndarray:
    """
    Calculate a time series of discretized pheromone intensity maps based on ant positions and return as a NumPy array.
    
    Parameters:
    - data (pd.DataFrame): DataFrame where each pair of columns represents x and y positions of an ant.
    - grid_size (tuple): Size of the original simulation grid (height, width).
    - coarse_grid_size (tuple): Size of the discretized grid (height, width).
    - bounding_box (int): Size of the bounding box around each ant in the coarse grid.
    - decay_rate (float): Decay rate of pheromones per time step.
    - deposition_rate (float): Amount of pheromone deposited by an ant.
    - diffusion_sigma (float): Standard deviation for Gaussian diffusion.
    - scale (bool): Whether to scale the pheromone maps between 0 and 1.
    - frame_rate (int): Number of frames per second in the video data.
    - snapshot_interval_sec (float): Interval in seconds at which to capture pheromone map snapshots.
    - save_to_file (bool): If True, saves the time series to a .npy file.
    - output_file_path (str): File path to save the NumPy array if save_to_file is True.
    
    Returns:
    - pheromone_time_series (np.ndarray): 3D array with shape (num_snapshots, coarse_height, coarse_width).
    """
    
    # Calculate discretization factors
    coarse_height, coarse_width = coarse_grid_size
    fine_height, fine_width = grid_size
    factor_y = fine_height / coarse_height
    factor_x = fine_width / coarse_width
    
    if not (fine_height % coarse_height == 0 and fine_width % coarse_width == 0):
        raise ValueError("Fine grid dimensions must be divisible by coarse grid dimensions.")
    
    # Initialize pheromone grid
    pheromone_map = np.zeros(coarse_grid_size, dtype=np.float32)
    
    # Initialize list to store pheromone snapshots
    pheromone_time_series = []
    
    # Calculate the number of frames between snapshots
    frames_per_snapshot = int(snapshot_interval_sec * frame_rate)
    if frames_per_snapshot < 1:
        frames_per_snapshot = 1  # Ensure at least every frame is captured if interval is very small
    
    # Determine the number of ants based on DataFrame columns
    if isinstance(data.columns, pd.MultiIndex):
        ant_indices = data.columns.get_level_values(0).unique()
        coordinate_levels = data.columns.get_level_values(1).unique()
        if 'x' not in coordinate_levels or 'y' not in coordinate_levels:
            raise ValueError("DataFrame must have 'x' and 'y' as subcolumn levels.")
        num_ants = len(ant_indices)
    else:
        # Assuming flat columns alternating between x and y
        num_ants = len(data.columns) // 2
    
    # Adjust bounding box from fine to coarse grid
    bounding_box_coarse = max(1, math.ceil(bounding_box / factor_x))
    
    # Iterate through each time step with progress tracking
    for idx, row in tqdm(data.iterrows(), total=data.shape[0], desc="Processing Frames"):
        # Decay pheromone
        pheromone_map *= (1 - decay_rate)
        
        # Iterate through each ant
        for ant in range(num_ants):
            if isinstance(data.columns, pd.MultiIndex):
                x_col = (ant, 'x')
                y_col = (ant, 'y')
            else:
                x_col = data.columns[ant*2]
                y_col = data.columns[ant*2 + 1]
            
            try:
                x_fine = float(row[x_col])
                y_fine = float(row[y_col])
            except (ValueError, TypeError, KeyError):
                # Handle missing or invalid data
                continue
            
            # Map fine coordinates to coarse grid
            x_coarse = int(x_fine // factor_x)
            y_coarse = int(y_fine // factor_y)
            
            # Ensure coordinates are within coarse grid bounds
            if not (0 <= x_coarse < coarse_width and 0 <= y_coarse < coarse_height):
                continue
            
            # Define bounding box in coarse grid
            half_box = bounding_box_coarse // 2
            x_start = max(x_coarse - half_box, 0)
            x_end = min(x_coarse + half_box + 1, coarse_width)
            y_start = max(y_coarse - half_box, 0)
            y_end = min(y_coarse + half_box + 1, coarse_height)
            
            # Deposit pheromone in the bounding box
            pheromone_map[y_start:y_end, x_start:x_end] += deposition_rate
        
        # Optional: Diffuse pheromones to simulate spreading
        pheromone_map = gaussian_filter(pheromone_map, sigma=diffusion_sigma)
        
        # Capture snapshot if it's the right frame
        if (idx + 1) % frames_per_snapshot == 0:
            snapshot = pheromone_map.copy()
            pheromone_time_series.append(snapshot)
    
    if scale:
        # Convert list to NumPy array for efficient scaling
        pheromone_time_series = np.array(pheromone_time_series, dtype=np.float32)
        global_max = pheromone_time_series.max()
        if global_max > 0:
            pheromone_time_series /= global_max
        pheromone_time_series = np.clip(pheromone_time_series, 0, 1)
    else:
        pheromone_time_series = np.array(pheromone_time_series, dtype=np.float32)
    
    if save_to_file:
        np.save(output_file_path, pheromone_time_series)
        print(f"Pheromone time series saved to '{output_file_path}'")
    
    return pheromone_time_series


In [4]:
def calculate_pheromone_intensity_time_series_array_discrete_save(
    data: pd.DataFrame,
    grid_size: tuple = (900, 900),
    coarse_grid_size: tuple = (20, 20),
    bounding_box: int = 5,
    decay_rate: float = 0.01,
    deposition_rate: float = 0.1,
    diffusion_sigma: float = 1,
    scale: bool = True,
    frame_rate: int = 60,
    snapshot_interval_sec: float = 1.0,
    save_to_file: bool = True,
    output_file_path: str = "pheromone_time_series_discrete.npy"
) -> None:
    """
    Calculate a time series of discretized pheromone intensity maps based on ant positions and save as a NumPy array file.
    
    Parameters:
    - data (pd.DataFrame): DataFrame where each pair of columns represents x and y positions of an ant.
    - grid_size (tuple): Size of the original simulation grid (height, width).
    - coarse_grid_size (tuple): Size of the discretized grid (height, width).
    - bounding_box (int): Size of the bounding box around each ant in the coarse grid.
    - decay_rate (float): Decay rate of pheromones per time step.
    - deposition_rate (float): Amount of pheromone deposited by an ant.
    - diffusion_sigma (float): Standard deviation for Gaussian diffusion.
    - scale (bool): Whether to scale the pheromone maps between 0 and 1.
    - frame_rate (int): Number of frames per second in the video data.
    - snapshot_interval_sec (float): Interval in seconds at which to capture pheromone map snapshots.
    - save_to_file (bool): If True, saves the time series to a .npy file.
    - output_file_path (str): File path to save the NumPy array if save_to_file is True.
    
    Returns:
    - None
    """
    
    if save_to_file:
        # Calculate discretization factors
        coarse_height, coarse_width = coarse_grid_size
        fine_height, fine_width = grid_size
        factor_y = fine_height / coarse_height
        factor_x = fine_width / coarse_width
        
        if not (fine_height % coarse_height == 0 and fine_width % coarse_width == 0):
            raise ValueError("Fine grid dimensions must be divisible by coarse grid dimensions.")
        
        # Calculate frames per snapshot and number of snapshots
        frames_per_snapshot = int(snapshot_interval_sec * frame_rate)
        if frames_per_snapshot < 1:
            frames_per_snapshot = 1  # Ensure at least every frame is captured if interval is very small
        
        num_frames = data.shape[0]
        num_snapshots = num_frames // frames_per_snapshot
        
        # Initialize a memory-mapped file for efficient incremental writing
        pheromone_memmap = np.memmap(
            output_file_path,
            dtype='float32',
            mode='w+',
            shape=(num_snapshots, coarse_height, coarse_width)
        )
    else:
        pheromone_time_series = []
    
    # Determine the number of ants based on DataFrame columns
    if isinstance(data.columns, pd.MultiIndex):
        ant_indices = data.columns.get_level_values(0).unique()
        coordinate_levels = data.columns.get_level_values(1).unique()
        if 'x' not in coordinate_levels or 'y' not in coordinate_levels:
            raise ValueError("DataFrame must have 'x' and 'y' as subcolumn levels.")
        num_ants = len(ant_indices)
    else:
        # Assuming flat columns alternating between x and y
        num_ants = len(data.columns) // 2
    
    # Adjust bounding box from fine to coarse grid
    bounding_box_coarse = max(1, math.ceil(bounding_box / factor_x))
    
    # Initialize pheromone grid
    pheromone_map = np.zeros(coarse_grid_size, dtype=np.float32)
    
    # Initialize global maximum for scaling if required
    if scale:
        global_max = 1e-8  # Start with a small value to avoid division by zero
    
    # Initialize snapshot counter
    snapshot_counter = 0
    
    # Iterate through each time step with progress tracking
    with tqdm(total=num_frames, desc="Processing Frames") as pbar:
        for idx, row in data.iterrows():
            # Decay pheromone
            pheromone_map *= (1 - decay_rate)
            
            # Iterate through each ant
            for ant in range(num_ants):
                if isinstance(data.columns, pd.MultiIndex):
                    x_col = (ant, 'x')
                    y_col = (ant, 'y')
                else:
                    x_col = data.columns[ant*2]
                    y_col = data.columns[ant*2 + 1]
                
                try:
                    x_fine = float(row[x_col])
                    y_fine = float(row[y_col])
                except (ValueError, TypeError, KeyError):
                    # Handle missing or invalid data by skipping this ant
                    continue
                
                # Check for NaN values and skip if any are found
                if np.isnan(x_fine) or np.isnan(y_fine):
                    continue  # Skip this ant for the current frame
                
                # Map fine coordinates to coarse grid
                x_coarse = int(x_fine // factor_x)
                y_coarse = int(y_fine // factor_y)
                
                # Ensure coordinates are within coarse grid bounds
                if not (0 <= x_coarse < coarse_width and 0 <= y_coarse < coarse_height):
                    continue  # Skip if out of bounds
                
                # Define bounding box in coarse grid
                half_box = bounding_box_coarse // 2
                x_start = max(x_coarse - half_box, 0)
                x_end = min(x_coarse + half_box + 1, coarse_width)
                y_start = max(y_coarse - half_box, 0)
                y_end = min(y_coarse + half_box + 1, coarse_height)
                
                # Deposit pheromone in the bounding box
                pheromone_map[y_start:y_end, x_start:x_end] += deposition_rate
            
            # Optional: Diffuse pheromones to simulate spreading
            pheromone_map = gaussian_filter(pheromone_map, sigma=diffusion_sigma)
            
            # Capture snapshot if it's the right frame
            if (idx + 1) % frames_per_snapshot == 0:
                snapshot = pheromone_map.copy()
                
                if scale:
                    # Update global maximum
                    current_max = snapshot.max()
                    if current_max > global_max:
                        global_max = current_max
                    # Scale the snapshot
                    snapshot /= global_max
                    snapshot = np.clip(snapshot, 0, 1)
                
                if save_to_file:
                    pheromone_memmap[snapshot_counter] = snapshot
                    snapshot_counter += 1
                else:
                    pheromone_time_series.append(snapshot)
            
            pbar.update(1)
    
    if save_to_file:
        # Flush changes to disk
        pheromone_memmap.flush()
        print(f"Pheromone time series saved to '{output_file_path}' as a NumPy memmap file.")
    else:
        if scale:
            # Convert list to NumPy array for efficient scaling
            pheromone_time_series = np.array(pheromone_time_series, dtype=np.float32)
            pheromone_time_series /= pheromone_time_series.max()
            pheromone_time_series = np.clip(pheromone_time_series, 0, 1)
        else:
            pheromone_time_series = np.array(pheromone_time_series, dtype=np.float32)
        return pheromone_time_series


In [5]:
def calculate_pheromone_intensity_time_series_save_numpy(
    data: pd.DataFrame,
    grid_size: tuple = (900, 900),
    coarse_grid_size: tuple = (20, 20),
    bounding_box: int = 5,
    decay_rate: float = 0.01,
    deposition_rate: float = 0.1,
    diffusion_sigma: float = 1,
    scale: bool = True,
    frame_rate: int = 60,
    snapshot_interval_sec: float = 1.0,
    save_to_file: bool = True,
    output_file_path: str = "pheromone_time_series_discrete.npy"
) -> None:
    """
    Save pheromone time series as a standard .npy file using numpy.save().
    """
    if save_to_file:
        coarse_height, coarse_width = coarse_grid_size
        fine_height, fine_width = grid_size
        factor_y = fine_height / coarse_height
        factor_x = fine_width / coarse_width
        
        if not (fine_height % coarse_height == 0 and fine_width % coarse_width == 0):
            raise ValueError("Fine grid dimensions must be divisible by coarse grid dimensions.")
        
        frames_per_snapshot = int(snapshot_interval_sec * frame_rate)
        frames_per_snapshot = max(frames_per_snapshot, 1)
        
        num_frames = data.shape[0]
        num_snapshots = num_frames // frames_per_snapshot
        
        # Preallocate the pheromone time series array
        pheromone_time_series = np.zeros((num_snapshots, coarse_height, coarse_width), dtype='float32')
        
        # Determine the number of ants
        if isinstance(data.columns, pd.MultiIndex):
            ant_indices = data.columns.get_level_values(0).unique()
            coordinate_levels = data.columns.get_level_values(1).unique()
            if 'x' not in coordinate_levels or 'y' not in coordinate_levels:
                raise ValueError("DataFrame must have 'x' and 'y' as subcolumn levels.")
            num_ants = len(ant_indices)
        else:
            num_ants = len(data.columns) // 2
        
        bounding_box_coarse = max(1, math.ceil(bounding_box / factor_x))
        pheromone_map = np.zeros(coarse_grid_size, dtype=np.float32)
        
        global_max = 1e-8 if scale else None
        snapshot_counter = 0
        
        with tqdm(total=num_frames, desc="Processing Frames") as pbar:
            for idx, row in data.iterrows():
                pheromone_map *= (1 - decay_rate)
                
                for ant in range(num_ants):
                    if isinstance(data.columns, pd.MultiIndex):
                        x_col = (ant, 'x')
                        y_col = (ant, 'y')
                    else:
                        x_col = data.columns[ant*2]
                        y_col = data.columns[ant*2 + 1]
                    
                    try:
                        x_fine = float(row[x_col])
                        y_fine = float(row[y_col])
                    except (ValueError, TypeError, KeyError):
                        continue
                    
                    if np.isnan(x_fine) or np.isnan(y_fine):
                        continue
                    
                    x_coarse = int(x_fine // factor_x)
                    y_coarse = int(y_fine // factor_y)
                    
                    if not (0 <= x_coarse < coarse_width and 0 <= y_coarse < coarse_height):
                        continue
                    
                    half_box = bounding_box_coarse // 2
                    x_start = max(x_coarse - half_box, 0)
                    x_end = min(x_coarse + half_box + 1, coarse_width)
                    y_start = max(y_coarse - half_box, 0)
                    y_end = min(y_coarse + half_box + 1, coarse_height)
                    
                    pheromone_map[y_start:y_end, x_start:x_end] += deposition_rate
                
                pheromone_map = gaussian_filter(pheromone_map, sigma=diffusion_sigma)
                
                if (idx + 1) % frames_per_snapshot == 0:
                    snapshot = pheromone_map.copy()
                    
                    if scale:
                        current_max = snapshot.max()
                        if current_max > global_max:
                            global_max = current_max
                        snapshot /= global_max
                        snapshot = np.clip(snapshot, 0, 1)
                    
                    pheromone_time_series[snapshot_counter] = snapshot
                    snapshot_counter += 1
                
                pbar.update(1)
        
        # Save the pheromone time series using numpy.save()
        np.save(output_file_path, pheromone_time_series)
        print(f"Pheromone time series saved to '{output_file_path}' as a .npy file.")


In [6]:
import h5py

def calculate_pheromone_intensity_time_series_save_hdf5(
    data: pd.DataFrame,
    grid_size: tuple = (900, 900),
    coarse_grid_size: tuple = (20, 20),
    bounding_box: int = 5,
    decay_rate: float = 0.01,
    deposition_rate: float = 0.1,
    diffusion_sigma: float = 1,
    scale: bool = True,
    frame_rate: int = 60,
    snapshot_interval_sec: float = 1.0,
    save_to_file: bool = True,
    output_file_path: str = "pheromone_time_series_discrete.h5"
) -> None:
    """
    Save pheromone time series as an HDF5 file using h5py.
    """
    if save_to_file:
        coarse_height, coarse_width = coarse_grid_size
        fine_height, fine_width = grid_size
        factor_y = fine_height / coarse_height
        factor_x = fine_width / coarse_width
        
        if not (fine_height % coarse_height == 0 and fine_width % coarse_width == 0):
            raise ValueError("Fine grid dimensions must be divisible by coarse grid dimensions.")
        
        frames_per_snapshot = int(snapshot_interval_sec * frame_rate)
        frames_per_snapshot = max(frames_per_snapshot, 1)
        
        num_frames = data.shape[0]
        num_snapshots = num_frames // frames_per_snapshot
        
        # Open HDF5 file
        with h5py.File(output_file_path, 'w') as h5f:
            # Create dataset with extendable dimensions
            dset = h5f.create_dataset(
                "pheromone_time_series",
                shape=(num_snapshots, coarse_height, coarse_width),
                maxshape=(num_snapshots, coarse_height, coarse_width),
                dtype='float32'
            )
            
            # Determine the number of ants
            if isinstance(data.columns, pd.MultiIndex):
                ant_indices = data.columns.get_level_values(0).unique()
                coordinate_levels = data.columns.get_level_values(1).unique()
                if 'x' not in coordinate_levels or 'y' not in coordinate_levels:
                    raise ValueError("DataFrame must have 'x' and 'y' as subcolumn levels.")
                num_ants = len(ant_indices)
            else:
                num_ants = len(data.columns) // 2
            
            bounding_box_coarse = max(1, math.ceil(bounding_box / factor_x))
            pheromone_map = np.zeros(coarse_grid_size, dtype=np.float32)
            
            global_max = 1e-8 if scale else None
            snapshot_counter = 0
            
            with tqdm(total=num_frames, desc="Processing Frames") as pbar:
                for idx, row in data.iterrows():
                    pheromone_map *= (1 - decay_rate)
                    
                    for ant in range(num_ants):
                        if isinstance(data.columns, pd.MultiIndex):
                            x_col = (ant, 'x')
                            y_col = (ant, 'y')
                        else:
                            x_col = data.columns[ant*2]
                            y_col = data.columns[ant*2 + 1]
                        
                        try:
                            x_fine = float(row[x_col])
                            y_fine = float(row[y_col])
                        except (ValueError, TypeError, KeyError):
                            continue
                        
                        if np.isnan(x_fine) or np.isnan(y_fine):
                            continue
                        
                        x_coarse = int(x_fine // factor_x)
                        y_coarse = int(y_fine // factor_y)
                        
                        if not (0 <= x_coarse < coarse_width and 0 <= y_coarse < coarse_height):
                            continue
                        
                        half_box = bounding_box_coarse // 2
                        x_start = max(x_coarse - half_box, 0)
                        x_end = min(x_coarse + half_box + 1, coarse_width)
                        y_start = max(y_coarse - half_box, 0)
                        y_end = min(y_coarse + half_box + 1, coarse_height)
                        
                        pheromone_map[y_start:y_end, x_start:x_end] += deposition_rate
                    
                    # pheromone_map = gaussian_filter(pheromone_map, sigma=diffusion_sigma)
                    
                    if (idx + 1) % frames_per_snapshot == 0:
                        snapshot = pheromone_map.copy()
                        
                        if scale:
                            current_max = snapshot.max()
                            if current_max > global_max:
                                global_max = current_max
                            snapshot /= global_max
                            snapshot = np.clip(snapshot, 0, 1)
                        
                        dset[snapshot_counter] = snapshot
                        snapshot_counter += 1
                    
                    pbar.update(1)
        
        print(f"Pheromone time series saved to '{output_file_path}' as an HDF5 file.")


In [None]:
# output_file = "pheromone_time_series_discrete.npy"
output_file = "pheromone_time_series_discrete.h5"
calculate_pheromone_intensity_time_series_save_hdf5(
    data=data,
    grid_size=(900, 900),
    coarse_grid_size=(50, 50),  # Discretized to 20x20 grid
    bounding_box=5,             # Bounding box in coarse grid
    decay_rate=0.01,
    deposition_rate=0.1,
    diffusion_sigma=1,
    scale=True,
    frame_rate=60,
    snapshot_interval_sec=1.0,  # Capture every 1 second
    save_to_file=True,
    output_file_path=os.path.join(DATA_DIRECTORY, output_file)
)

Processing Frames:  58%|█████▊    | 500634/864003 [06:52<05:14, 1155.04it/s]