Cluster ordering function taken from Ishimoto 2018 Sci Rep

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import glob

In [2]:
# --- Configuration ---
BOX_SIZE = 500.0  # Must match the box size of the simulation
DR = BOX_SIZE/100          # Bin width for distance (resolution)
R_MAX = BOX_SIZE/4     # Max distance to calculate correlation for

In [None]:
def compute_rdf_pbc(coords1, coords2, box_size, r_range):
    """
    Computes the 2D Radial Distribution Function g(r) with Periodic Boundary Conditions.
    
    Args:
        coords1: (N, 2) array of positions for reference particles (Type A)
        coords2: (M, 2) array of positions for target particles (Type B)
        box_size: Side length of the simulation box
        r_range: Array of bin edges for distance
    """
    
    # 1. Calculate vector differences (N x M x 2) using broadcasting
    delta = coords1[:, np.newaxis, :] - coords2[np.newaxis, :, :]
    
    # 2. Apply Periodic Boundary Conditions (Minimum Image Convention)
    delta = delta - box_size * np.round(delta / box_size)
    
    # 3. Calculate squared distances
    dists_sq = np.sum(delta**2, axis=2)
    dists = np.sqrt(dists_sq)
    
    # 4. Flatten
    dists_flat = dists.flatten()
    
    # 5. Histogram counts
    counts, bin_edges = np.histogram(dists_flat, bins=r_range)
    
    # 6. Normalization by Ideal Gas Density (Area of shells)
    # Area of 2D shell = pi * (r_outer^2 - r_inner^2)
    areas = np.pi * (r_range[1:]**2 - r_range[:-1]**2)
    
    # Global density of Type B
    rho_2 = len(coords2) / (box_size**2)
    
    # Expected count if particles were randomly distributed
    # N_ref * Density_target * Area_shell
    expected_counts = len(coords1) * rho_2 * areas
    
    # Calculate g(r) avoiding divide by zero
    with np.errstate(divide='ignore', invalid='ignore'):
        g_r = counts / expected_counts
        
    # Get bin centers for plotting
    r_centers = (r_range[:-1] + r_range[1:]) / 2
    return r_centers, g_r

def analyze_and_plot(filename, times_to_analyze):
    """Loads a parquet file, computes correlations, and plots them."""
    print(f"--- Analyzing: {filename} ---")
    df = pd.read_parquet(filename)
    
    # Setup bins
    r_bins = np.arange(0, R_MAX + DR, DR)
    
    # Prepare Plot
    fig, axes = plt.subplots(1, len(times_to_analyze), figsize=(6 * len(times_to_analyze), 5), sharey=True)
    if len(times_to_analyze) == 1: axes = [axes]
    
    for ax, t in zip(axes, times_to_analyze):
        # Filter for closest time point
        available_times = df['Time'].unique()
        actual_t = available_times[np.argmin(np.abs(available_times - t))]
        df_t = df[df['Time'] == actual_t]
        
        # Extract positions
        pos_motile = df_t[df_t['Type'] == 'motile'][['Pos_X', 'Pos_Y']].values
        pos_sub = df_t[df_t['Type'] == 'submotile'][['Pos_X', 'Pos_Y']].values
        pos_all = df_t[['Pos_X', 'Pos_Y']].values

        # --- A. All-to-All Correlation ---
        # Useful for homogeneous simulations
        if len(pos_all) > 0:
            r, g = compute_rdf_pbc(pos_all, pos_all, BOX_SIZE, r_bins, exclude_self=True)
            ax.plot(r, g, label='All Particles', color='black', alpha=0.5, linestyle='--')

        # --- B. Specific Correlations (for Heterogeneous) ---
        # 1. Motile-Motile
        if len(pos_motile) > 10:
            r, g = compute_rdf_pbc(pos_motile, pos_motile, BOX_SIZE, r_bins, exclude_self=True)
            ax.plot(r, g, label='Motile-Motile', color='C0')
            
        # 2. Submotile-Submotile
        if len(pos_sub) > 10:
            r, g = compute_rdf_pbc(pos_sub, pos_sub, BOX_SIZE, r_bins, exclude_self=True)
            ax.plot(r, g, label='Sub-Sub', color='C1')
            
        # 3. Motile-Submotile (Cross Correlation)
        if len(pos_motile) > 0 and len(pos_sub) > 0:
            r, g = compute_rdf_pbc(pos_motile, pos_sub, BOX_SIZE, r_bins, exclude_self=False)
            ax.plot(r, g, label='Motile-Sub', color='purple')

        ax.set_title(f"Time $\\approx$ {actual_t}")
        ax.set_xlabel("Distance $r$")
        ax.set_ylabel("$g(r)$")
        ax.axhline(1.0, color='gray', linestyle=':', alpha=0.5) # Reference line
        ax.grid(True, alpha=0.3)
        ax.legend()
        
    plt.suptitle(f"File: {filename}", fontsize=14)
    plt.tight_layout()
    plt.show()

# --- Main Execution Loop ---
# This grabs all parquet files generated by your previous script
simulation_files = glob.glob("*.parquet")
simulation_files.sort()

# Choose specific time points to check (e.g., Early, Middle, Late)
# Adjust these based on your simulation length
time_points = [10.0, 50.0, 100.0] 

if not simulation_files:
    print("No parquet files found! Make sure to run the simulation first.")
else:
    for f in simulation_files:
        analyze_and_plot(f, time_points)