# Jet pT Cross-Section Analysis

This notebook loads the PYTHIA8 simulation data and creates a jet pT cross-section spectrum using:
- JSON file with event normalization information
- Parquet file with jet data

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json
import seaborn as sns

# Set up plotting style
plt.style.use('default')
sns.set_palette("husl")
plt.rcParams['figure.figsize'] = (10, 7)
plt.rcParams['font.size'] = 12

## Load Data Files

Load the normalization data from JSON and jet data from parquet file.

In [None]:
# Load normalization data from JSON
with open('pythia8_simple_eec_output.json', 'r') as f:
    norm_data = json.load(f)

print("Normalization data:")
print(f"Number of accepted events: {norm_data['n_accepted']}")
print(f"Generated cross-section: {norm_data['sigma_gen']:.6f} mb")
print(f"Sum of weights: {norm_data['sum_weights']:.1f}")

# Load jet data from parquet
df_jets = pd.read_parquet('pythia8_simple_eec_output.parquet')

print(f"\nJet data loaded:")
print(f"Number of jets: {len(df_jets)}")
print(f"Columns: {list(df_jets.columns)}")
print(f"pT range: {df_jets['pt'].min():.2f} - {df_jets['pt'].max():.2f} GeV")

## Create Jet pT Cross-Section Spectrum

Calculate the differential cross-section d²σ/dpT/dη as a function of jet pT.

In [None]:
# Define pT binning (logarithmic spacing works well for jet pT spectra)
pt_min, pt_max = 20, 100  # GeV
n_bins = 20
pt_bins = np.logspace(np.log10(pt_min), np.log10(pt_max), n_bins + 1)

# Create histogram of jet pT
counts, bin_edges = np.histogram(df_jets['pt'], bins=pt_bins, weights=df_jets['ev_weight'])

# Calculate bin centers and widths
bin_centers = (bin_edges[1:] + bin_edges[:-1]) / 2
bin_widths = bin_edges[1:] - bin_edges[:-1]

# Calculate cross-section normalization
sigma_gen = norm_data['sigma_gen']  # mb
sum_weights = norm_data['sum_weights']

# Calculate differential cross-section: d²σ/dpT/dη
# Assuming |η| < 1.0 (from your script), so Δη = 2.0
delta_eta = 2.0
cross_section = (counts / bin_widths) * (sigma_gen / sum_weights) / delta_eta  # mb/GeV

print(f"Calculated cross-section for {len(counts)} pT bins")
print(f"Cross-section range: {cross_section[cross_section>0].min():.2e} - {cross_section.max():.2e} mb/GeV")

## Plot Cross-Section Spectrum

Create publication-quality plots of the jet pT cross-section.

In [None]:
# Create the main cross-section plot
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10), gridspec_kw={'height_ratios': [3, 1]})

# Main plot - log scale
mask = cross_section > 0  # Only plot non-zero values
ax1.errorbar(bin_centers[mask], cross_section[mask], 
             xerr=bin_widths[mask]/2, fmt='o-', capsize=3, markersize=6,
             label='PYTHIA8 Jets (anti-kT R=0.4)', linewidth=2)

ax1.set_yscale('log')
ax1.set_xscale('log')
ax1.set_ylabel(r'$\frac{d^2\sigma}{dp_T d\eta}$ [mb/GeV]', fontsize=14)
ax1.set_title('Jet pT Cross-Section Spectrum', fontsize=16, fontweight='bold')
ax1.grid(True, alpha=0.3)
ax1.legend(fontsize=12)

# Add text box with simulation parameters
textstr = f'Events: {norm_data["n_accepted"]}\nσ_gen = {sigma_gen:.3f} mb\n|η| < 1.0'
props = dict(boxstyle='round', facecolor='wheat', alpha=0.8)
ax1.text(0.05, 0.95, textstr, transform=ax1.transAxes, fontsize=10,
         verticalalignment='top', bbox=props)

# Bottom panel - linear scale for better visibility of shape
ax2.plot(bin_centers[mask], cross_section[mask], 'o-', markersize=6, linewidth=2)
ax2.set_xlabel(r'Jet $p_T$ [GeV]', fontsize=14)
ax2.set_ylabel(r'$\frac{d^2\sigma}{dp_T d\eta}$ [mb/GeV]', fontsize=12)
ax2.grid(True, alpha=0.3)
ax2.set_xscale('log')

plt.tight_layout()
plt.show()

## Additional Analysis

Explore other jet properties and their correlations.

In [None]:
# Create summary plots of jet properties
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 10))

# Jet pT distribution
ax1.hist(df_jets['pt'], bins=30, alpha=0.7, edgecolor='black')
ax1.set_xlabel('Jet pT [GeV]')
ax1.set_ylabel('Number of Jets')
ax1.set_title('Jet pT Distribution')
ax1.grid(True, alpha=0.3)

# Jet eta distribution
ax2.hist(df_jets['eta'], bins=30, alpha=0.7, edgecolor='black', color='orange')
ax2.set_xlabel('Jet η')
ax2.set_ylabel('Number of Jets')
ax2.set_title('Jet η Distribution')
ax2.grid(True, alpha=0.3)

# Jet mass vs pT
scatter = ax3.scatter(df_jets['pt'], df_jets['m'], alpha=0.6, s=20)
ax3.set_xlabel('Jet pT [GeV]')
ax3.set_ylabel('Jet Mass [GeV]')
ax3.set_title('Jet Mass vs pT')
ax3.grid(True, alpha=0.3)

# Angularity analysis
ax4.scatter(df_jets['pt'], df_jets['angk1a1'], alpha=0.6, s=20, label='k=1, α=1')
ax4.scatter(df_jets['pt'], df_jets['angk1a2'], alpha=0.6, s=20, label='k=1, α=2')
ax4.scatter(df_jets['pt'], df_jets['angk1a3'], alpha=0.6, s=20, label='k=1, α=3')
ax4.set_xlabel('Jet pT [GeV]')
ax4.set_ylabel('Angularity')
ax4.set_title('Angularity vs pT')
ax4.legend()
ax4.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
# Summary statistics
print("=== Jet Analysis Summary ===")
print(f"Total number of jets: {len(df_jets)}")
print(f"Mean jet pT: {df_jets['pt'].mean():.2f} ± {df_jets['pt'].std():.2f} GeV")
print(f"Mean jet mass: {df_jets['m'].mean():.2f} ± {df_jets['m'].std():.2f} GeV")
print(f"Mean jet η: {df_jets['eta'].mean():.3f} ± {df_jets['eta'].std():.3f}")

# Cross-section integration
total_integrated_xsec = np.sum(cross_section * bin_widths) * delta_eta
print(f"\nTotal integrated cross-section: {total_integrated_xsec:.4f} mb")
print(f"Generated cross-section: {sigma_gen:.4f} mb")
print(f"Ratio (should be ~1): {total_integrated_xsec/sigma_gen:.4f}")

# Save results to CSV for further analysis
results_df = pd.DataFrame({
    'pt_center': bin_centers,
    'pt_width': bin_widths, 
    'cross_section': cross_section,
    'counts': counts
})

results_df.to_csv('jet_pt_cross_section.csv', index=False)
print(f"\nResults saved to 'jet_pt_cross_section.csv'")