In [1]:
import sys, math
from collections import namedtuple
import numpy as np
import pandas as pd
import pygame
import random
import lzma
import os

DATA_DIRECTORY = "data/2023_2/"
INPUT_FILE = 'KA050_processed_10cm_5h_20230614.pkl.xz'

def load_data(source_dir, input_file, scale = None, arena_dim = None):
    data = None
    with lzma.open(os.path.join(source_dir, input_file)) as file:
        data = pd.read_pickle(file)
    return data.iloc[::int(scale)] if scale else data


def process_data(data, arena_dim):
    data_len = len(data)
    arena_bb = find_bounding_box(data)
    origin_arena = calculate_circle(*arena_bb)

    translation, scale = circle_transformation(origin_arena, arena_dim)

    apply_transform_scale(data, translation, scale)

    return data

data = load_data(DATA_DIRECTORY, INPUT_FILE)



pygame 2.5.2 (SDL 2.28.3, Python 3.11.5)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [2]:
data[0]

Unnamed: 0,x,y
0,180.0,225.0
1,180.0,225.0
2,180.0,225.0
3,180.0,224.0
4,180.0,224.0
...,...,...
863998,522.0,418.0
863999,522.0,418.0
864000,522.0,418.0
864001,522.0,418.0


In [3]:
import pandas as pd
import numpy as np
import math

def add_theta_interleaved(df):
    """
    Calculate the angle theta for each individual and interleave it after each (x, y) pair.

    Parameters:
    df (pd.DataFrame): DataFrame containing positional data with MultiIndex columns.
                       The first level represents individual identifiers (e.g., 0, 1, 2, ...),
                       and the second level contains 'x' and 'y' coordinates.

    Returns:
    pd.DataFrame: The original DataFrame with additional 'theta' columns interleaved after each (x, y) pair.
                  The new columns are named as 'theta' under each individual.
    """
    # Check if columns are MultiIndex
    if not isinstance(df.columns, pd.MultiIndex):
        raise ValueError("DataFrame columns must be a MultiIndex with levels: [individual, coordinate].")

    # Extract unique individuals from the first level of the MultiIndex
    individuals = df.columns.get_level_values(0).unique()

    for individual in individuals:
        # Extract x and y columns for the current individual
        try:
            x = df[individual, 'x']
            y = df[individual, 'y']
        except KeyError:
            print(f"Individual {individual} does not have both 'x' and 'y' columns. Skipping.")
            continue

        # Calculate differences between consecutive positions
        dx = x.shift(-1) - x
        dy = y.shift(-1) - y

        # Invert dy for Pygame's coordinate system (y increases downward)
        dy_inverted = -dy

        # Calculate theta using arctan2
        theta = np.arctan2(dy_inverted, dx)

        # Normalize theta to be within [0, 2*pi)
        theta_normalized = theta % (2 * math.pi)

        # Insert the theta column after the 'y' column for the current individual
        # Find the position to insert (after 'y')
        # Get the current column indices as list
        col_tuples = list(df.columns)
        # Find the index of ('individual', 'y')
        try:
            y_col_index = col_tuples.index((individual, 'y'))
        except ValueError:
            print(f"Column ('{individual}', 'y') not found. Skipping theta insertion for this individual.")
            continue

        # Define the new column tuple
        theta_col = (individual, 'theta')

        # Insert the new theta column
        # Since pandas does not support inserting MultiIndex columns directly,
        # we'll create a new DataFrame with the theta column and concatenate.
        df.insert(y_col_index + 1, theta_col, theta_normalized)

    return df


# data_with_angle = add_theta_interleaved(data)

In [4]:
def add_smoothed_theta(df, window_size=20, smoothed_suffix='smoothed_theta'):
    """
    Add a smoothed theta column for each individual using a sliding window average.

    Parameters:
    - df (pd.DataFrame): DataFrame with MultiIndex columns where level 0 is individual IDs and level 1 is 'x','y','theta'.
    - window_size (int): Number of frames to include in the sliding window for averaging.
    - smoothed_suffix (str): Suffix for the smoothed theta column.

    Returns:
    - pd.DataFrame: Original DataFrame with additional smoothed theta columns interleaved after each theta column.
    """
    # Ensure the DataFrame has MultiIndex columns
    if not isinstance(df.columns, pd.MultiIndex):
        raise ValueError("DataFrame columns must be a MultiIndex with levels: [individual, coordinate].")
    
    # Extract unique individual IDs
    individuals = df.columns.get_level_values(0).unique()
    
    # Dictionary to store smoothed theta data
    smoothed_theta_data = {}
    
    for individual in individuals:
        # Check if 'theta' column exists for the individual
        if 'theta' not in df[individual].columns:
            print(f"Individual {individual} does not have a 'theta' column. Skipping.")
            continue
        
        # Extract the 'theta' column
        theta = df[individual, 'theta']
        
        # Handle missing values: forward-fill then backward-fill
        theta_filled = theta.fillna(method='ffill').fillna(method='bfill')
        
        # Convert theta to sine and cosine components
        sin_theta = np.sin(theta_filled)
        cos_theta = np.cos(theta_filled)
        
        # Compute rolling (sliding window) average of sine and cosine
        sin_avg = sin_theta.rolling(window=window_size, min_periods=1).mean()
        cos_avg = cos_theta.rolling(window=window_size, min_periods=1).mean()
        
        # Reconstruct the smoothed theta using arctan2 of averaged sine and cosine
        theta_smoothed = np.arctan2(sin_avg, cos_avg) % (2 * math.pi)
        
        # Define the new column name for smoothed_theta
        smoothed_theta_col = (individual, smoothed_suffix)
        
        # Store the smoothed theta data
        smoothed_theta_data[smoothed_theta_col] = theta_smoothed
    
    # Create a DataFrame from the smoothed_theta_data dictionary
    df_smoothed_theta = pd.DataFrame(smoothed_theta_data, index=df.index)
    
    # Concatenate the smoothed_theta DataFrame with the original DataFrame
    df_combined = pd.concat([df, df_smoothed_theta], axis=1)
    
    # Now, interleave the smoothed_theta columns after each original theta column
    # Create a list to hold the new column order
    new_order = []
    
    # Iterate through the original column order
    for col in df.columns:
        new_order.append(col)
        # If the column is a 'theta' column, append the corresponding 'smoothed_theta' column
        if col[1] == 'theta':
            smoothed_col = (col[0], smoothed_suffix)
            if smoothed_col in df_combined.columns:
                new_order.append(smoothed_col)
    
    # Reorder the columns based on the new_order list
    df_final = df_combined[new_order]
    
    return df_final

# data_with_avg_angle = add_smoothed_theta(data_with_angle)

In [5]:
def add_theta_and_smoothed_theta(df, window_size=20, smoothed_suffix='smoothed_theta'):
    """
    Calculate theta and smoothed_theta for each individual and interleave them correctly.

    Parameters:
    - df (pd.DataFrame): DataFrame with MultiIndex columns [individual, x/y/theta].
    - window_size (int): Number of frames for the sliding window to compute smoothed_theta.
    - smoothed_suffix (str): Suffix for the smoothed_theta column.

    Returns:
    - pd.DataFrame: DataFrame with [x, y, theta, smoothed_theta] for each individual.
    """
    # Ensure the DataFrame has MultiIndex columns
    if not isinstance(df.columns, pd.MultiIndex):
        raise ValueError("DataFrame columns must be a MultiIndex with levels: [individual, coordinate].")

    # Extract unique individuals
    individuals = df.columns.get_level_values(0).unique()

    # Iterate in reverse order to prevent shifting column indices during insertion
    for individual in reversed(individuals):
        # Check if 'x' and 'y' columns exist for the individual
        if ('x' not in df[individual].columns) or ('y' not in df[individual].columns):
            print(f"Individual {individual} does not have both 'x' and 'y' columns. Skipping.")
            continue

        # Extract x and y coordinates
        x = df[individual, 'x']
        y = df[individual, 'y']

        # Calculate differences between consecutive positions
        dx = x.shift(-1) - x
        dy = y.shift(-1) - y

        # Calculate theta using arctan2
        theta = np.arctan2(dy, dx)

        # Normalize theta to be within [0, 2*pi)
        theta_normalized = theta % (2 * math.pi)

        # Define the new theta column tuple
        theta_col = (individual, 'theta')

        # Find the position to insert theta (after 'y')
        try:
            y_col = (individual, 'y')
            y_col_index = list(df.columns).index(y_col)
            df.insert(y_col_index + 1, theta_col, theta_normalized)
        except ValueError:
            print(f"Column {y_col} not found for individual {individual}. Skipping theta insertion.")
            continue

        # Extract the newly inserted theta column
        theta_series = df[individual, 'theta']

        # Handle missing values: forward-fill then backward-fill
        # theta_filled = theta_series.fillna(method='ffill').fillna(method='bfill')

        # Convert theta to sine and cosine components
        sin_theta = np.sin(theta_series)
        cos_theta = np.cos(theta_series)

        # Compute rolling (sliding window) average of sine and cosine
        sin_avg = sin_theta.rolling(window=window_size, min_periods=1).mean()
        cos_avg = cos_theta.rolling(window=window_size, min_periods=1).mean()

        # Reconstruct the smoothed theta using arctan2 of averaged sine and cosine
        theta_smoothed = np.arctan2(sin_avg, cos_avg) % (2 * math.pi)

        # Define the new smoothed_theta column tuple
        smoothed_theta_col = (individual, smoothed_suffix)

        # Insert the smoothed_theta column immediately after the theta column
        try:
            theta_col_index = list(df.columns).index(theta_col)
            df.insert(theta_col_index + 1, smoothed_theta_col, theta_smoothed)
        except ValueError:
            print(f"Column {theta_col} not found for individual {individual}. Skipping smoothed_theta insertion.")
            continue
        
        # Set smoothed_theta to NaN where original theta is NaN
        df[smoothed_theta_col] = df[smoothed_theta_col].where(~theta_series.isna(), np.nan)

    return df


In [6]:

def add_smoothed_movement(df, window_size=20, smoothed_suffix='smoothed_distance'):
    """
    Adds 'distance' and 'smoothed_distance' columns for each ant in the DataFrame.

    Parameters:
    - df (pd.DataFrame): The input DataFrame with MultiIndex columns (ant_number, subcolumns).
    - window_size (int): The window size for the rolling average.
    - smoothed_suffix (str): The suffix for the smoothed distance column.

    Returns:
    - pd.DataFrame: The DataFrame with added 'distance' and 'smoothed_distance' columns.
    """
    
    # Ensure the DataFrame has MultiIndex columns
    if not isinstance(df.columns, pd.MultiIndex):
        raise ValueError("The DataFrame must have MultiIndex columns (ant_number, subcolumns).")
    
    # List to hold new columns
    new_columns = []

    # Iterate over each ant
    for ant in df.columns.levels[0]:
        # Extract x and y coordinates
        x = df[(ant, 'x')]
        y = df[(ant, 'y')]

        # Compute shifted coordinates for previous position
        x_shifted = x.shift(1)
        y_shifted = y.shift(1)

        # Calculate Euclidean distance between consecutive positions
        distance = np.sqrt((x - x_shifted)**2 + (y - y_shifted)**2)

        # Assign distance to a new subcolumn
        df[(ant, 'distance')] = distance

        # Compute smoothed distance using rolling window, ignoring NaNs
        smoothed_distance = distance.rolling(window=window_size, min_periods=1).mean()

        # Assign smoothed distance to a new subcolumn
        df[(ant, smoothed_suffix)] = smoothed_distance

        # Prepare new subcolumns for reordering
        new_columns.extend([
            (ant, 'x'),
            (ant, 'y'),
            (ant, 'theta'),
            (ant, 'smoothed_theta'),
            (ant, 'distance'),
            (ant, smoothed_suffix)
        ])

    # Create a new MultiIndex for ordered columns
    new_multiindex = pd.MultiIndex.from_tuples(new_columns, names=df.columns.names)

    # Reindex the DataFrame to have columns in the desired order
    df = df.reindex(columns=new_multiindex)

    return df

In [7]:
df_processed = add_theta_and_smoothed_theta(data, window_size=20)
# df_processed = add_smoothed_movement(df_processed, window_size=20)
df_processed

  df.insert(y_col_index + 1, theta_col, theta_normalized)
  df.insert(theta_col_index + 1, smoothed_theta_col, theta_smoothed)
  df.insert(y_col_index + 1, theta_col, theta_normalized)


Unnamed: 0_level_0,0,0,0,0,1,1,1,1,2,2,...,54,54,55,55,55,55,56,56,56,56
Unnamed: 0_level_1,x,y,theta,smoothed_theta,x,y,theta,smoothed_theta,x,y,...,theta,smoothed_theta,x,y,theta,smoothed_theta,x,y,theta,smoothed_theta
0,180.0,225.0,0.000000,0.000000,339.0,591.0,0.785398,0.785398,326.0,614.0,...,,,,,,,,,,
1,180.0,225.0,0.000000,0.000000,340.0,592.0,0.000000,0.392699,325.0,614.0,...,,,,,,,,,,
2,180.0,225.0,4.712389,5.819538,340.0,592.0,0.000000,0.255495,325.0,614.0,...,,,,,,,,,,
3,180.0,224.0,0.000000,5.961435,340.0,592.0,0.000000,0.188480,324.0,614.0,...,,,,,,,,,,
4,180.0,224.0,4.712389,5.695183,340.0,592.0,0.000000,0.149106,324.0,614.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
863998,522.0,418.0,0.000000,0.000000,569.0,410.0,0.000000,0.000000,,,...,,,,,,,,,,
863999,522.0,418.0,0.000000,0.000000,569.0,410.0,0.000000,0.000000,,,...,,,,,,,,,,
864000,522.0,418.0,0.000000,0.000000,569.0,410.0,0.000000,0.000000,,,...,,,,,,,,,,
864001,522.0,418.0,0.000000,0.000000,569.0,410.0,0.000000,0.000000,,,...,,,,,,,,,,


In [8]:
df_processed.to_pickle(os.path.join(DATA_DIRECTORY, 'KA050_processed_10cm_5h_20230614_angles.pkl.xz'), compression='xz')
# df_processed.to_pickle(os.path.join(DATA_DIRECTORY, 'KA050_processed_10cm_5h_20230614_smoothed.pkl.xz'), compression='xz')