In [1]:
import polars as pl
import pandas as pd
import numpy as np
import os
import re
import random
from scipy.spatial import KDTree

# Configurações
d_max = 1500.0  # Distância máxima para vizinhança (metros)
area = np.pi * (1.8 ** 2)  # Área do detector em m²
pasta_base = 'tratados_auger'

# Carregar o mapa de detectores do Auger
detectores_df = pd.read_csv('map_auger.csv', names=['id', 'x', 'y'])
positions = detectores_df[['x', 'y']].values
tree = KDTree(positions)  # KDTree global para todas as simulações

# Função para validar triangulação
def is_valid_simulation(detections):
    detected_indices = np.where(detections)[0]
    if len(detected_indices) < 3:
        return False
    
    neighbors = []
    for tank in detected_indices:
        neighbors.append(set(tree.query_ball_point(positions[tank], d_max)))
    
    for i in range(len(detected_indices)):
        for j in range(i + 1, len(detected_indices)):
            if detected_indices[j] not in neighbors[i]:
                continue
            for k in range(j + 1, len(detected_indices)):
                if (detected_indices[k] in neighbors[i] and 
                    detected_indices[k] in neighbors[j]):
                    return True
    return False

# Função para extrair composição e energia do nome do arquivo
def extract_info(filename):
    match = re.match(r'([a-zA-Z]+)([0-9E\.]+)', filename.replace('_auger.csv', ''))
    if match:
        return match.group(1).lower(), match.group(2)
    return None, None

# Processar arquivos
dataframes = []
bin_width = 6.5  # Largura dos bins em metros

for root, dirs, files in os.walk(pasta_base):
    for arquivo in files:
        if arquivo.endswith('_auger.csv'):
            caminho = os.path.join(root, arquivo)
            print(f"Processando: {arquivo}")
            
            # Extrair composição e energia
            composition, energy = extract_info(arquivo)
            if composition is None:
                print(f"  Formato inválido: {arquivo}")
                continue
                
            try:
                # Ler e processar densidades
                densidades_df = pd.read_csv(caminho)
                densidades_df['density_error'] = densidades_df.apply(
                    lambda row: random.normalvariate(row['density'], row['error']),
                    axis=1
                )
                
                # Calcular bin_center para detectores
                detectores = detectores_df.copy()
                detectores['r'] = np.sqrt(detectores['x']**2 + detectores['y']**2)
                detectores['bin_center'] = (detectores['r'] // bin_width) * bin_width + bin_width/2
                detectores.loc[detectores['r'] >= 6500, 'bin_center'] = 6503.25
                
                # Pivotar e mesclar dados
                densidades_pivot = densidades_df.pivot(
                    index='bin_center', 
                    columns='eas', 
                    values='density_error'
                ).reset_index()
                
                merged_df = pd.merge(
                    detectores, 
                    densidades_pivot, 
                    on='bin_center', 
                    how='left'
                ).fillna(0)
                
                # Calcular partículas (densidade * área)
                eas_cols = [col for col in merged_df.columns if col not in ['id', 'x', 'y', 'r', 'bin_center']]
                for col in eas_cols:
                    merged_df[col] = merged_df[col] * area
                
                # Processar cada evento (cada coluna eas)
                event_data = []
                for eas_col in eas_cols:
                    # Criar vetor de detecções
                    detections = (merged_df[eas_col] >= 1.0).values
                    
                    # Validar triangulação
                    if is_valid_simulation(detections):
                        # Coletar dados dos detectores ativados
                        for idx, row in merged_df.iterrows():
                            particles = row[eas_col]
                            if particles >= 1.0:
                                event_data.append({
                                    'id': row['id'],
                                    'particles': particles,
                                    'simulation_id': eas_col,
                                    'composition': composition,
                                    'energy': energy
                                })
                
                # Criar DataFrame do arquivo
                if event_data:
                    df_arquivo = pl.DataFrame(event_data)
                    dataframes.append(df_arquivo)
                    #print(f"  Eventos válidos: {len(df_arquivo['simulation_id'].unique()}")
                else:
                    print("  Nenhum evento válido encontrado")
                    
            except Exception as e:
                print(f"  Erro no processamento: {str(e)}")

# Combinar todos os dados
if dataframes:
    df_final = pl.concat(dataframes)
    df_final = df_final.sample(fraction=1.0, shuffle=True)
    print("\nDataFrame final:")
    print(df_final)
    #print(f"\nTotal de eventos: {len(df_final['simulation_id'].unique()}")
    print(f"Total de registros: {len(df_final)}")
else:
    print("Nenhum dado válido foi processado")

Processando: carbon1E18_auger.csv
Processando: carbon1E19_auger.csv
Processando: carbon1E20_auger.csv
Processando: carbon3.16E18_auger.csv
Processando: carbon3.16E19_auger.csv
Processando: iron1E18_auger.csv
Processando: iron1E19_auger.csv
Processando: iron1E20_auger.csv
Processando: iron3.16E18_auger.csv
Processando: iron3.16E19_auger.csv
Processando: nitrogen1E18_auger.csv
Processando: nitrogen1E19_auger.csv
Processando: nitrogen1E20_auger.csv
Processando: nitrogen3.16E18_auger.csv
Processando: nitrogen3.16E19_auger.csv
Processando: oxygen1E18_auger.csv
Processando: oxygen1E19_auger.csv
Processando: oxygen1E20_auger.csv
Processando: oxygen3.16E18_auger.csv
Processando: oxygen3.16E19_auger.csv
Processando: photon1E18_auger.csv
Processando: photon1E19_auger.csv
Processando: photon1E20_auger.csv
Processando: photon3.16E18_auger.csv
Processando: photon3.16E19_auger.csv
Processando: proton1E18_auger.csv
Processando: proton1E19_auger.csv
Processando: proton1E20_auger.csv
Processando: proto

In [2]:
df_final.columns

['id', 'particles', 'simulation_id', 'composition', 'energy']

In [3]:
df_final.filter((pl.col('simulation_id') == 83) & (pl.col('composition') == 'carbon') & (pl.col('energy') == '1E19'))

id,particles,simulation_id,composition,energy
f64,f64,i64,str,str
829.0,1.326103,83,"""carbon""","""1E19"""
837.0,4.021297,83,"""carbon""","""1E19"""
838.0,3.0991e7,83,"""carbon""","""1E19"""
826.0,12.165836,83,"""carbon""","""1E19"""
1871.0,15.921578,83,"""carbon""","""1E19"""
…,…,…,…,…
1447.0,17.34271,83,"""carbon""","""1E19"""
1429.0,3.067517,83,"""carbon""","""1E19"""
1233.0,13.641252,83,"""carbon""","""1E19"""
1436.0,3.537144,83,"""carbon""","""1E19"""


In [4]:
df_final.filter((pl.col('simulation_id') == 87) & (pl.col('composition') == 'iron') & (pl.col('energy') == '3.16E18'))

id,particles,simulation_id,composition,energy
f64,f64,i64,str,str
1871.0,10.114342,87,"""iron""","""3.16E18"""
838.0,5093600.0,87,"""iron""","""3.16E18"""
805.0,5.615584,87,"""iron""","""3.16E18"""
1233.0,4.459506,87,"""iron""","""3.16E18"""
1447.0,4.431907,87,"""iron""","""3.16E18"""
809.0,1.101454,87,"""iron""","""3.16E18"""
826.0,2.122492,87,"""iron""","""3.16E18"""
1445.0,5.205993,87,"""iron""","""3.16E18"""


In [5]:
max_detectors_event = (
    df_final
    .group_by(["simulation_id", "composition", "energy"])
    .agg(pl.len().alias("count"))
    .sort("count", descending=True)
    .head(1)
)

reference_detectors = df_final.filter(
    (pl.col("simulation_id") == max_detectors_event["simulation_id"][0]) &
    (pl.col("composition") == max_detectors_event["composition"][0]) &
    (pl.col("energy") == max_detectors_event["energy"][0])
).select("id").unique()

all_combinations = (
    df_final.select(["simulation_id", "composition", "energy"]).unique()
    .join(reference_detectors, how="cross")
)

df_fixed = (
    all_combinations
    .join(
        df_final,
        on=["simulation_id", "composition", "energy", "id"],
        how="left"
    )
    .with_columns(
        pl.col("particles").fill_null(0.0),
        pl.col("id").cast(pl.Float64)  
    )
    .sort(["simulation_id", "composition", "energy", "id"])
)

print(df_fixed)

shape: (115_632, 5)
┌───────────────┬─────────────┬────────┬────────┬───────────┐
│ simulation_id ┆ composition ┆ energy ┆ id     ┆ particles │
│ ---           ┆ ---         ┆ ---    ┆ ---    ┆ ---       │
│ i64           ┆ str         ┆ str    ┆ f64    ┆ f64       │
╞═══════════════╪═════════════╪════════╪════════╪═══════════╡
│ 1             ┆ carbon      ┆ 1E18   ┆ 503.0  ┆ 0.0       │
│ 1             ┆ carbon      ┆ 1E18   ┆ 506.0  ┆ 0.0       │
│ 1             ┆ carbon      ┆ 1E18   ┆ 509.0  ┆ 0.0       │
│ 1             ┆ carbon      ┆ 1E18   ┆ 510.0  ┆ 0.0       │
│ 1             ┆ carbon      ┆ 1E18   ┆ 511.0  ┆ 0.0       │
│ …             ┆ …           ┆ …      ┆ …      ┆ …         │
│ 500           ┆ oxygen      ┆ 1E18   ┆ 1526.0 ┆ 0.0       │
│ 500           ┆ oxygen      ┆ 1E18   ┆ 1717.0 ┆ 0.0       │
│ 500           ┆ oxygen      ┆ 1E18   ┆ 1821.0 ┆ 0.0       │
│ 500           ┆ oxygen      ┆ 1E18   ┆ 1828.0 ┆ 0.0       │
│ 500           ┆ oxygen      ┆ 1E18   ┆ 1871.0 ┆ 

In [6]:
print(
    df_fixed.filter(
        (pl.col('simulation_id') == 3) &
        (pl.col('composition') == 'nitrogen') &
        (pl.col('energy') == '1E19')
    )
)

shape: (44, 5)
┌───────────────┬─────────────┬────────┬────────┬───────────┐
│ simulation_id ┆ composition ┆ energy ┆ id     ┆ particles │
│ ---           ┆ ---         ┆ ---    ┆ ---    ┆ ---       │
│ i64           ┆ str         ┆ str    ┆ f64    ┆ f64       │
╞═══════════════╪═════════════╪════════╪════════╪═══════════╡
│ 3             ┆ nitrogen    ┆ 1E19   ┆ 503.0  ┆ 0.0       │
│ 3             ┆ nitrogen    ┆ 1E19   ┆ 506.0  ┆ 0.0       │
│ 3             ┆ nitrogen    ┆ 1E19   ┆ 509.0  ┆ 0.0       │
│ 3             ┆ nitrogen    ┆ 1E19   ┆ 510.0  ┆ 0.0       │
│ 3             ┆ nitrogen    ┆ 1E19   ┆ 511.0  ┆ 0.0       │
│ …             ┆ …           ┆ …      ┆ …      ┆ …         │
│ 3             ┆ nitrogen    ┆ 1E19   ┆ 1526.0 ┆ 0.0       │
│ 3             ┆ nitrogen    ┆ 1E19   ┆ 1717.0 ┆ 0.0       │
│ 3             ┆ nitrogen    ┆ 1E19   ┆ 1821.0 ┆ 0.0       │
│ 3             ┆ nitrogen    ┆ 1E19   ┆ 1828.0 ┆ 0.0       │
│ 3             ┆ nitrogen    ┆ 1E19   ┆ 1871.0 ┆ 3.922

In [7]:
df_fixed.write_csv('data_NN_auger.csv')