In [None]:
from ipyfilechooser import FileChooser
from IPython.display import display

fc = FileChooser()
display(fc)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

# Set up plotting style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")
plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['font.size'] = 12

# File paths
# filename_base = "pythia_run_output" 
# filename_base = "arg_at_5GeV"
# events_file = f"{filename_base}_events.parquet"
# particles_file = f"{filename_base}_particles.parquet"
events_file = fc.selected if fc.selected else "pythia_run_output_events.parquet"
if fc.selected:
    events = Path(fc.selected).stem.replace("_events.", "_particles.")
else:
    particles_file = "pythia_run_output_particles.parquet"

print("Setting up visualization environment...")
print(f"Events file: {events_file}")
print(f"Particles file: {particles_file}")

# Load the data
try:
    events_df = pd.read_parquet(events_file)
    particles_df = pd.read_parquet(particles_file)
    
    print("Data loaded successfully!")
    print(f"Events: {len(events_df)} events")
    print(f"Particles: {len(particles_df)} particles")
    print(f"Average particles per event: {len(particles_df)/len(events_df):.1f}")
    
    # Display basic info about the data
    print("\nEvent-level columns:", list(events_df.columns))
    print("Particle-level columns:", list(particles_df.columns))
    
except FileNotFoundError as e:
    print(f"Error: Could not find data files. Make sure you have run the pythia_parquet.py script first.")
    print(f"Looking for: {events_file} and {particles_file}")
    print("You may need to adjust the file paths above.")

In [None]:
# Display sample data and statistics
print("Sample event-level data:")
display(events_df.head())
print("\nEvent-level data statistics:")
display(events_df.describe())

print("\n" + "="*50)
print("Sample particle-level data:")
display(particles_df.head())
print("\nParticle-level data statistics:")
display(particles_df.describe())

In [None]:
# Event-level histograms (normalized)
fig, axes = plt.subplots(2, 3, figsize=(18, 12))
fig.suptitle('Event-Level Distributions (Normalized)', fontsize=16, fontweight='bold')

# Get total number of events for normalization
n_events = len(events_df)

# Impact parameter
axes[0,0].hist(events_df['impact_parameter'], bins=30, alpha=0.7, color='skyblue', edgecolor='black', density=True)
axes[0,0].set_xlabel('Impact Parameter b (fm)')
axes[0,0].set_ylabel('Probability Density')
axes[0,0].set_title('Impact Parameter Distribution')
axes[0,0].grid(True, alpha=0.3)

# Number of participants
axes[0,1].hist(events_df['n_participants'], bins=30, alpha=0.7, color='lightcoral', edgecolor='black', density=True)
axes[0,1].set_xlabel('Number of Participants')
axes[0,1].set_ylabel('Probability Density')
axes[0,1].set_title('Participants Distribution')
axes[0,1].set_yscale('log')  # Log scale for better visibility
axes[0,1].grid(True, alpha=0.3)

# Number of collisions
axes[0,2].hist(events_df['n_collisions'], bins=30, alpha=0.7, color='lightgreen', edgecolor='black', density=True)
axes[0,2].set_xlabel('Number of Collisions')
axes[0,2].set_ylabel('Probability Density')
axes[0,2].set_title('Collisions Distribution')
axes[0,2].set_yscale('log')  # Log scale for better visibility
axes[0,2].grid(True, alpha=0.3)

# Number of particles per event
axes[1,0].hist(events_df['n_particles'], bins=30, alpha=0.7, color='gold', edgecolor='black', density=True)
axes[1,0].set_xlabel('Number of Particles')
axes[1,0].set_ylabel('Probability Density')
axes[1,0].set_title('Particle Multiplicity per Event')
axes[1,0].set_yscale('log')  # Log scale for better visibility
axes[1,0].grid(True, alpha=0.3)

# Mean pT per event
axes[1,1].hist(events_df['mean_pT'], bins=30, alpha=0.7, color='plum', edgecolor='black', density=True)
axes[1,1].set_xlabel('Mean pT (GeV/c)')
axes[1,1].set_ylabel('Probability Density')
axes[1,1].set_title('Mean pT per Event')
axes[1,1].grid(True, alpha=0.3)

# Event plane angle
axes[1,2].hist(events_df['event_plane_angle'], bins=30, alpha=0.7, color='orange', edgecolor='black', density=True)
axes[1,2].set_xlabel('Event Plane Angle (rad)')
axes[1,2].set_ylabel('Probability Density')
axes[1,2].set_title('Event Plane Angle Distribution')
axes[1,2].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
# Particle-level histograms (normalized per event)
fig, axes = plt.subplots(2, 2, figsize=(15, 12))
fig.suptitle('Particle-Level Distributions (Per Event)', fontsize=16, fontweight='bold')

# Get total number of events for normalization
n_events = len(events_df)

# pT distribution
counts, bins, patches = axes[0,0].hist(particles_df['pT'], bins=50, alpha=0.7, color='steelblue', edgecolor='black')
# Normalize by number of events
bin_width = bins[1] - bins[0]
normalized_counts = counts / n_events / bin_width
axes[0,0].clear()
axes[0,0].bar(bins[:-1], normalized_counts, width=bin_width, alpha=0.7, color='steelblue', edgecolor='black')
axes[0,0].set_xlabel('pT (GeV/c)')
axes[0,0].set_ylabel('Particles per Event per GeV/c')
axes[0,0].set_title('Transverse Momentum Distribution')
axes[0,0].set_yscale('log')  # Log scale for better visibility
axes[0,0].grid(True, alpha=0.3)

# eta distribution
counts, bins, patches = axes[0,1].hist(particles_df['eta'], bins=50, alpha=0.7, color='forestgreen', edgecolor='black')
# Normalize by number of events
bin_width = bins[1] - bins[0]
normalized_counts = counts / n_events / bin_width
axes[0,1].clear()
axes[0,1].bar(bins[:-1], normalized_counts, width=bin_width, alpha=0.7, color='forestgreen', edgecolor='black')
axes[0,1].set_xlabel('η (pseudorapidity)')
axes[0,1].set_ylabel('Particles per Event per η unit')
axes[0,1].set_title('Pseudorapidity Distribution')
axes[0,1].grid(True, alpha=0.3)

# phi distribution
counts, bins, patches = axes[1,0].hist(particles_df['phi'], bins=50, alpha=0.7, color='crimson', edgecolor='black')
# Normalize by number of events
bin_width = bins[1] - bins[0]
normalized_counts = counts / n_events / bin_width
axes[1,0].clear()
axes[1,0].bar(bins[:-1], normalized_counts, width=bin_width, alpha=0.7, color='crimson', edgecolor='black')
axes[1,0].set_xlabel('φ (radians)')
axes[1,0].set_ylabel('Particles per Event per radian')
axes[1,0].set_title('Azimuthal Angle Distribution')
axes[1,0].grid(True, alpha=0.3)

# Particle ID distribution (most common particles) - normalized per event
particle_counts = particles_df['particle_id'].value_counts().head(10)
normalized_counts = particle_counts / n_events
axes[1,1].bar(range(len(normalized_counts)), normalized_counts.values, alpha=0.7, color='purple')
axes[1,1].set_xlabel('Particle ID (PDG)')
axes[1,1].set_ylabel('Particles per Event')
axes[1,1].set_title('Most Common Particle Types (Per Event)')
axes[1,1].set_xticks(range(len(normalized_counts)))
axes[1,1].set_xticklabels(normalized_counts.index, rotation=45)
axes[1,1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Print the most common particles (per event)
print("Most common particles (per event):")
for pid, count in particle_counts.head().items():
    avg_per_event = count / n_events
    print(f"  PDG {pid}: {avg_per_event:.3f} particles per event")