In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from astropy.io import fits
from astropy.table import Table
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm

from exod.utils.path import data_raw, data_processed

# Load Events list
event_file = '../data/processed/0001730201/P0001730201PNU002PIEVLI0000_FILT.fits'

instrument = fits.open(event_file)[0].header['INSTRUME'] # ['EMOS1', 'EMOS2', 'EPN']
tab        = Table.read(event_file, hdu=1)
df         = tab.to_pandas()
df

In [None]:
# Filter only 1 CCD and set the start time to 0
df = df[df['CCDNR'] == 4]
df['TIME'] = df['TIME'] - df['TIME'].min()

# Only include columns we need
df = df[['TIME', 'RAWX', 'RAWY', 'PI']]
df

In [None]:
t_bin_size = 20
box_size   = 3
x_max = 69
y_max = 203

t_0, t_f   = df['TIME'].min(), df['TIME'].max()
t_bins     = t_bins = np.arange(t_0, t_f + t_bin_size, t_bin_size)
x_bins     = np.arange(0, x_max+box_size, box_size)
y_bins     = np.arange(0, y_max+box_size, box_size)

N_t_bins   = len(t_bins)
N_y_bins   = len(y_bins)
N_x_bins   = len(x_bins)

cube_size = N_t_bins * N_y_bins * N_x_bins
print(N_t_bins, N_y_bins, N_x_bins, cube_size)


# Group photons into time windows, and boxes
df['RAWX_GROUP'] = pd.cut(df['RAWX'], bins=x_bins)
df['RAWY_GROUP'] = pd.cut(df['RAWY'], bins=y_bins)
df['XY_BOX']     = df['RAWX_GROUP'].astype(str) + '_' + df['RAWY_GROUP'].astype(str)
df['TIME_BIN']   = pd.cut(df['TIME'], bins=t_bins, right=False)
df


In [None]:
df = df[~df['RAWX_GROUP'].isna()]
df

In [None]:
box_data

In [None]:
from tqdm import tqdm

In [None]:
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import StandardScaler

# Assuming df is your DataFrame

# Extract unique boxes
unique_boxes = df['XY_BOX'].unique()

# Perform DBSCAN clustering for each unique box
for box in tqdm(unique_boxes):
    # Select data for the current box
    box_data = df[df['XY_BOX'] == box][['TIME']]

    # Standardize the data
    scaler = StandardScaler()
    box_data_scaled = scaler.fit_transform(box_data)

    # Apply DBSCAN clustering
    dbscan = DBSCAN(eps=0.05, min_samples=10)
    labels = dbscan.fit_predict(box_data_scaled)

    # Add the cluster labels to the original DataFrame
    df.loc[df['XY_BOX'] == box, 'CLUSTER_LABEL'] = labels

df[['XY_BOX', 'TIME', 'CLUSTER_LABEL']]

In [None]:
df['CLUSTER_LABEL'].value_counts()

In [None]:
plt.figure(figsize=(5,10))
plt.scatter(df['RAWX'], df['RAWY'], s=0.001)
for i in range(1,7):
    df_lab = df[df['CLUSTER_LABEL'] == i]
    plt.scatter(df_lab['RAWX'], df_lab['RAWY'], marker='+', label=f'{i}')
plt.legend()
plt.show()


In [None]:
from mpl_toolkits.mplot3d import Axes3D

fig = plt.figure(figsize=(20, 10))
ax = fig.add_subplot(111, projection='3d')
for i in range(1,7):
    df_lab = df[df['CLUSTER_LABEL'] == i]
    ax.scatter(df_lab['RAWX'], df_lab['RAWY'], df_lab['TIME'], marker='.', label=f'{i}', s=3)
    
ax.set_xlabel('RAWX')
ax.set_ylabel('RAWY')
ax.set_zlabel('TIME')
ax.legend()
plt.show()


In [None]:
# Count the number of photons in each time_window + bin combination
# Using observed=True  will only return those groups that had values
# Using observed=False will return all combinations even if they did not have counts
df_sub = df[['TIME_BIN','XY_BOX', 'PI']]
box_counts = df_sub.groupby(['TIME_BIN', 'XY_BOX'], observed=True).count().reset_index()
box_counts

In [None]:
# Merge the result back with the original DataFrame based on 'TIME_BIN' and 'XY_BOX'
result_df = pd.merge(df, box_counts, on=['TIME_BIN', 'XY_BOX'], how='right', suffixes=('_original', '_count'))
result_df = result_df.drop(columns=['PI_count'])
result_df

In [None]:
# Extract X, Y low and high values for each photon
box_counts_split = box_counts['XY_BOX'].str.extract(r'\((\d+), (\d+)\]\_\((\d+), (\d+)\]').astype(int)
box_counts_split.columns = ['X_LO', 'X_HI', 'Y_LO', 'Y_HI']
box_counts_split['VAL'] = box_counts['PI'] # Add column with number of detected photons
box_counts_split

In [None]:

image_arrays = []
for time_bin in box_counts['TIME_BIN'].unique():
    image_size = (y_max, x_max)
    image_array = np.zeros(image_size, dtype=int)
    
    box_counts_time_bin = box_counts_split[box_counts['TIME_BIN'] == time_bin]
    for index, row in box_counts_time_bin.iterrows():
        image_array[row['Y_LO']:row['Y_HI'], row['X_LO']:row['X_HI']] = row['VAL']
    image_arrays.append(image_array)
    #plt.title(time_bin)
    #plt.imshow(image_array)
    #plt.show()
image_arrays = np.array(image_arrays)

print(f'Number of image frames={len(image_arrays)}')


c_max = np.max(image_arrays, axis=0)
c_median = np.median(image_arrays, axis=0)
c_min = np.min(image_arrays, axis=0)
c_median_nonzero = np.where(c_median == 0, 1, c_median)
V = np.maximum(c_max - c_median, c_median - c_min)

plt.imshow(V,  interpolation='none') # norm=LogNorm(),
#plt.xlim(0,64)
#plt.ylim(3,200)
plt.show()



In [None]:
image_size = (y_max, x_max)
V_array = np.zeros(image_size, dtype=int)
hit_array = np.zeros(image_size, dtype=int)

In [None]:
for i, row in box_counts_split.iterrows():
    V_array[row['Y_LO']:row['Y_HI'], row['X_LO']:row['X_HI']] += row['VAL']
    hit_array[row['Y_LO']:row['Y_HI'], row['X_LO']:row['X_HI']] += 1
    

In [None]:
from matplotlib.colors import LogNorm
fig, ax = plt.subplots(1,2)
ax[0].set_title('Summed energies')
m1 = ax[0].imshow(V_array, norm=LogNorm(), interpolation='none', origin='lower', cmap='hot')
plt.colorbar(m1, ax=ax[0])
ax[1].set_title('Summed Counts')
m2 = ax[1].imshow(hit_array, norm=LogNorm(), interpolation='none', origin='lower', cmap='hot')
plt.colorbar(m2, ax=ax[1])


In [None]:
df

In [None]:
df

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import DBSCAN
from sklearn.datasets import make_blobs

# Generate 1D data with two clusters and some noise
X, _ = make_blobs(n_samples=300, centers=[[2], [8]], cluster_std=1.0, random_state=42)

# Add some noise
noise = np.random.uniform(low=0, high=12, size=(30, 1))
X = np.concatenate([X, noise])

# Perform DBSCAN clustering
dbscan = DBSCAN(eps=0.5, min_samples=5)
labels = dbscan.fit_predict(X)

# Plot the original data
plt.scatter(X, np.zeros_like(X), label='Original Data')

# Plot the clustered data
unique_labels = np.unique(labels)
for label in unique_labels:
    if label == -1:
        plt.scatter(X[labels == label], np.zeros_like(X[labels == label]), label='Noise', marker='x', c='black')
    else:
        plt.scatter(X[labels == label], np.zeros_like(X[labels == label]), label=f'Cluster {label + 1}')

plt.title('DBSCAN Clustering of 1D Data')
plt.xlabel('Data Points')
plt.legend()
plt.show()


In [None]:
poi_idx

In [None]:
poi_samp = np.random.poisson(lam=0.01, size=100000)
poi_idx  = np.where(poi_samp>0)[0]
poi_toa  = poi_idx * 73.4E-3
poi_toa  = poi_toa.reshape(-1,1)

In [None]:
# Generate 1D data with two clusters and some noise
X, _ = make_blobs(n_samples=1000, centers=[[2000], [4000]], cluster_std=10)

noise = np.random.uniform(low=0, high=25000, size=(10,1))
X_sum = np.concatenate([X, poi_toa])
# Plot the original data
plt.scatter(X_sum, np.zeros_like(X_sum), label='Data', marker='.')
plt.scatter(X, np.zeros_like(X), label='Burst', marker='.')


dbscan = DBSCAN(eps=0.1, min_samples=10)
labels = dbscan.fit_predict(X)

# Plot the clustered data
unique_labels = np.unique(labels)
for i, label in enumerate(unique_labels):
    if label == -1:
        plt.scatter(X[labels == label], i+np.zeros_like(X[labels == label]), label='Noise', marker='x', c='black')
    else:
        plt.scatter(X[labels == label], i+np.zeros_like(X[labels == label]), label=f'Cluster {label + 1}')

plt.xlabel('Time')
plt.legend()
plt.show()
