## Error Detection

### Importing

In [194]:
import polars as pl
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import numpy as np

plt.style.use("fivethirtyeight")

### Data File

In [195]:
df = pl.read_csv(
    "sept_17_21h44_first.csv",
    schema_overrides={
        "trigger_id": pl.UInt16,
        "trigger_clk": pl.UInt64,
        "integer_data": pl.UInt32,
        "idk": pl.UInt32,
        "sensor_1-2": pl.Utf8,
        "sensor_3-4": pl.Utf8,
        "sensor_5-6": pl.Utf8,
        "internal_trigger": pl.Boolean,
        "veto_in": pl.Boolean
    }
)

binary_cols = ["sensor_1-2", "sensor_3-4", "sensor_5-6"]

df = df.with_columns([
    pl.col(col)
    .map_elements(lambda b: int(b, 2) if b is not None else None, return_dtype=pl.UInt8)
    .alias(col)
    for col in binary_cols
])

df = df.drop("integer_data").drop("idk")
print(df)

shape: (15_527, 7)
┌────────────┬─────────────┬────────────┬────────────┬────────────┬──────────────────┬─────────┐
│ trigger_id ┆ trigger_clk ┆ sensor_1-2 ┆ sensor_3-4 ┆ sensor_5-6 ┆ internal_trigger ┆ veto_in │
│ ---        ┆ ---         ┆ ---        ┆ ---        ┆ ---        ┆ ---              ┆ ---     │
│ u16        ┆ u64         ┆ u8         ┆ u8         ┆ u8         ┆ bool             ┆ bool    │
╞════════════╪═════════════╪════════════╪════════════╪════════════╪══════════════════╪═════════╡
│ 0          ┆ 1882168984  ┆ 255        ┆ 255        ┆ 0          ┆ false            ┆ false   │
│ 0          ┆ 1882176984  ┆ 255        ┆ 255        ┆ 0          ┆ false            ┆ false   │
│ 0          ┆ 1072996353  ┆ 255        ┆ 255        ┆ 0          ┆ false            ┆ false   │
│ 0          ┆ 1073038198  ┆ 255        ┆ 255        ┆ 0          ┆ false            ┆ false   │
│ 0          ┆ 1073168771  ┆ 255        ┆ 255        ┆ 0          ┆ false            ┆ false   │
│ …        

### Time Conversion

In [196]:
df = df.with_columns(
    (pl.col("trigger_clk") / 40000000).alias("trigger_clk")
)

#### Printing DataFrame

In [197]:
print(df)

shape: (15_527, 7)
┌────────────┬─────────────┬────────────┬────────────┬────────────┬──────────────────┬─────────┐
│ trigger_id ┆ trigger_clk ┆ sensor_1-2 ┆ sensor_3-4 ┆ sensor_5-6 ┆ internal_trigger ┆ veto_in │
│ ---        ┆ ---         ┆ ---        ┆ ---        ┆ ---        ┆ ---              ┆ ---     │
│ u16        ┆ f64         ┆ u8         ┆ u8         ┆ u8         ┆ bool             ┆ bool    │
╞════════════╪═════════════╪════════════╪════════════╪════════════╪══════════════════╪═════════╡
│ 0          ┆ 47.054225   ┆ 255        ┆ 255        ┆ 0          ┆ false            ┆ false   │
│ 0          ┆ 47.054425   ┆ 255        ┆ 255        ┆ 0          ┆ false            ┆ false   │
│ 0          ┆ 26.824909   ┆ 255        ┆ 255        ┆ 0          ┆ false            ┆ false   │
│ 0          ┆ 26.825955   ┆ 255        ┆ 255        ┆ 0          ┆ false            ┆ false   │
│ 0          ┆ 26.829219   ┆ 255        ┆ 255        ┆ 0          ┆ false            ┆ false   │
│ …        

### Trigger Data Errors

#### Sensor 1-2

In [198]:
""" td_df = df.with_columns(
    pl.col("sensor_1-2").cast(pl.UInt8).map_elements(
        lambda x: format(x, '08b'),
        return_dtype=pl.String
    ).alias("trigger_data_binary")
)

td_df = td_df.with_columns([
    pl.col("trigger_data_binary").str.slice(i*2, 2).map_elements(
        lambda b: int(b, 2), return_dtype=pl.UInt8)
        .alias(f"chunk_{i}")
        for i in range(4)])

td_df = td_df.filter(
    pl.struct(td_df.columns).map_elements(lambda row: 0 not in row and 3 not in row)
)
    
td_df = td_df.drop("trigger_data_binary").drop("sensor_1-2")

s12_df = td_df.select(
    [f"chunk_{i}" for i in range(4)]
)

print(s12_df) """

' td_df = df.with_columns(\n    pl.col("sensor_1-2").cast(pl.UInt8).map_elements(\n        lambda x: format(x, \'08b\'),\n        return_dtype=pl.String\n    ).alias("trigger_data_binary")\n)\n\ntd_df = td_df.with_columns([\n    pl.col("trigger_data_binary").str.slice(i*2, 2).map_elements(\n        lambda b: int(b, 2), return_dtype=pl.UInt8)\n        .alias(f"chunk_{i}")\n        for i in range(4)])\n\ntd_df = td_df.filter(\n    pl.struct(td_df.columns).map_elements(lambda row: 0 not in row and 3 not in row)\n)\n\ntd_df = td_df.drop("trigger_data_binary").drop("sensor_1-2")\n\ns12_df = td_df.select(\n    [f"chunk_{i}" for i in range(4)]\n)\n\nprint(s12_df) '

### Trigger Data

In [199]:
def extract_chunks(df, col_name, chunk_start_idx):
    df = df.with_columns(
        pl.col(col_name)
        .cast(pl.UInt8)
        .map_elements(lambda x: format(x, '08b'), return_dtype=pl.String)
        .alias(f"{col_name}_binary")
    )
    df = df.with_columns([
        pl.col(f"{col_name}_binary")
        .str.slice(i * 2, 2)
        .map_elements(lambda b: int(b, 2), return_dtype=pl.UInt8)
        .alias(f"chunk_{chunk_start_idx + i}")
        for i in range(4)
    ])
    df = df.drop(f"{col_name}_binary")
    return df

df = extract_chunks(df, "sensor_1-2", 0)
df = extract_chunks(df, "sensor_3-4", 4)
df = extract_chunks(df, "sensor_5-6", 8)

chunk_df = df.select([f"chunk_{i}" for i in range(12)])

df = df.drop([f"chunk_{i}" for i in range(12)])


#### Printing DataFrames

In [200]:
s12_df = chunk_df.select([f"chunk_{i}" for i in range(4)])
print(s12_df)

shape: (15_527, 4)
┌─────────┬─────────┬─────────┬─────────┐
│ chunk_0 ┆ chunk_1 ┆ chunk_2 ┆ chunk_3 │
│ ---     ┆ ---     ┆ ---     ┆ ---     │
│ u8      ┆ u8      ┆ u8      ┆ u8      │
╞═════════╪═════════╪═════════╪═════════╡
│ 3       ┆ 3       ┆ 3       ┆ 3       │
│ 3       ┆ 3       ┆ 3       ┆ 3       │
│ 3       ┆ 3       ┆ 3       ┆ 3       │
│ 3       ┆ 3       ┆ 3       ┆ 3       │
│ 3       ┆ 3       ┆ 3       ┆ 3       │
│ …       ┆ …       ┆ …       ┆ …       │
│ 3       ┆ 3       ┆ 3       ┆ 3       │
│ 3       ┆ 3       ┆ 3       ┆ 3       │
│ 3       ┆ 3       ┆ 3       ┆ 3       │
│ 3       ┆ 3       ┆ 3       ┆ 3       │
│ 3       ┆ 3       ┆ 3       ┆ 3       │
└─────────┴─────────┴─────────┴─────────┘


In [201]:
s34_df = chunk_df.select([f"chunk_{i}" for i in range(4,8)])
print(s34_df)

shape: (15_527, 4)
┌─────────┬─────────┬─────────┬─────────┐
│ chunk_4 ┆ chunk_5 ┆ chunk_6 ┆ chunk_7 │
│ ---     ┆ ---     ┆ ---     ┆ ---     │
│ u8      ┆ u8      ┆ u8      ┆ u8      │
╞═════════╪═════════╪═════════╪═════════╡
│ 3       ┆ 3       ┆ 3       ┆ 3       │
│ 3       ┆ 3       ┆ 3       ┆ 3       │
│ 3       ┆ 3       ┆ 3       ┆ 3       │
│ 3       ┆ 3       ┆ 3       ┆ 3       │
│ 3       ┆ 3       ┆ 3       ┆ 3       │
│ …       ┆ …       ┆ …       ┆ …       │
│ 3       ┆ 3       ┆ 3       ┆ 3       │
│ 3       ┆ 3       ┆ 3       ┆ 3       │
│ 3       ┆ 3       ┆ 3       ┆ 3       │
│ 3       ┆ 3       ┆ 3       ┆ 3       │
│ 3       ┆ 3       ┆ 3       ┆ 3       │
└─────────┴─────────┴─────────┴─────────┘


In [202]:
s56_df = chunk_df.select([f"chunk_{i}" for i in range(8,12)])
print(s56_df)

shape: (15_527, 4)
┌─────────┬─────────┬──────────┬──────────┐
│ chunk_8 ┆ chunk_9 ┆ chunk_10 ┆ chunk_11 │
│ ---     ┆ ---     ┆ ---      ┆ ---      │
│ u8      ┆ u8      ┆ u8       ┆ u8       │
╞═════════╪═════════╪══════════╪══════════╡
│ 0       ┆ 0       ┆ 0        ┆ 0        │
│ 0       ┆ 0       ┆ 0        ┆ 0        │
│ 0       ┆ 0       ┆ 0        ┆ 0        │
│ 0       ┆ 0       ┆ 0        ┆ 0        │
│ 0       ┆ 0       ┆ 0        ┆ 0        │
│ …       ┆ …       ┆ …        ┆ …        │
│ 0       ┆ 0       ┆ 0        ┆ 0        │
│ 0       ┆ 0       ┆ 0        ┆ 0        │
│ 0       ┆ 0       ┆ 0        ┆ 0        │
│ 0       ┆ 2       ┆ 0        ┆ 0        │
│ 0       ┆ 0       ┆ 0        ┆ 0        │
└─────────┴─────────┴──────────┴──────────┘


### Veto In Errors

In [203]:
vi_df = df.filter(pl.col("veto_in") == True)

#### Printing DataFrames

In [204]:
print(vi_df)

shape: (1_237, 7)
┌────────────┬─────────────┬────────────┬────────────┬────────────┬──────────────────┬─────────┐
│ trigger_id ┆ trigger_clk ┆ sensor_1-2 ┆ sensor_3-4 ┆ sensor_5-6 ┆ internal_trigger ┆ veto_in │
│ ---        ┆ ---         ┆ ---        ┆ ---        ┆ ---        ┆ ---              ┆ ---     │
│ u16        ┆ f64         ┆ u8         ┆ u8         ┆ u8         ┆ bool             ┆ bool    │
╞════════════╪═════════════╪════════════╪════════════╪════════════╪══════════════════╪═════════╡
│ 0          ┆ 27.304063   ┆ 255        ┆ 255        ┆ 1          ┆ false            ┆ true    │
│ 0          ┆ 27.475376   ┆ 255        ┆ 255        ┆ 2          ┆ false            ┆ true    │
│ 0          ┆ 27.63347    ┆ 255        ┆ 255        ┆ 128        ┆ false            ┆ true    │
│ 0          ┆ 27.671425   ┆ 255        ┆ 255        ┆ 128        ┆ false            ┆ true    │
│ 0          ┆ 28.262075   ┆ 255        ┆ 255        ┆ 32         ┆ false            ┆ true    │
│ …         

### Internal Trigger Errors

In [205]:
it_df = df.filter(pl.col("internal_trigger") == True)

#### Printing DataFrames

In [206]:
print(it_df)

shape: (0, 7)
┌────────────┬─────────────┬────────────┬────────────┬────────────┬──────────────────┬─────────┐
│ trigger_id ┆ trigger_clk ┆ sensor_1-2 ┆ sensor_3-4 ┆ sensor_5-6 ┆ internal_trigger ┆ veto_in │
│ ---        ┆ ---         ┆ ---        ┆ ---        ┆ ---        ┆ ---              ┆ ---     │
│ u16        ┆ f64         ┆ u8         ┆ u8         ┆ u8         ┆ bool             ┆ bool    │
╞════════════╪═════════════╪════════════╪════════════╪════════════╪══════════════════╪═════════╡
└────────────┴─────────────┴────────────┴────────────┴────────────┴──────────────────┴─────────┘


## Plotting

In [207]:
'''
-x = clk, y = faulty ids (tells us if there is an interval which issues happen)
-heat map (graph that changes color when get 0 or 2)
-3d gaussian distribution x = , y = , z = (tells us)
'''

'\n-x = clk, y = faulty ids (tells us if there is an interval which issues happen)\n-heat map (graph that changes color when get 0 or 2)\n-3d gaussian distribution x = , y = , z = (tells us)\n'

### Faulty Rods at Intervals

### Heat Map

In [208]:
hm_df = td_df.select([f"chunk_{i}" for i in range(12)])
#print(hm_df)
#print(hm_df.to_numy())

channel = ["chunk_1", "chunk_2", "chunk_3", "chunk_4", "chunk_5", "chunk_6", "chunk_7",
           "chunk_8", "chunk_9", "chunk_10", "chunk_11"]

""" time = vi_df["trigger_clk"]
data = [channel, time, hm_df]

fig = px.imshow(hm_df,
                labels=dict(x="Channels", y="Time?? (s)", color="Activtion Type"),
                x=["0/1","2/3", "4/5", "6/7", "8/9","10/11", "12/13", "14/15",
            "16/17", "18/19", "20/21", "22/23"]
               ) """

# limits to 10000 data points because won't render with all
fig = px.imshow(hm_df.to_numpy()[:10000],aspect='auto', labels=dict(x="Channels",
        y="Time (s)", title="SiPM Activation Type per Seconds", color="Activation Type"),
                x=["0/1","2/3", "4/5", "6/7", "8/9","10/11", "12/13", "14/15",
            "16/17", "18/19", "20/21", "22/23"], color_continuous_scale="twilight") #or hsv (for fun)

fig.update_xaxes(side="top")

fig.show()

ColumnNotFoundError: unable to find column "chunk_4"; valid columns: ["trigger_id", "trigger_clk", "sensor_1-2", "sensor_3-4", "internal_trigger", "veto_in", "chunk_0", "chunk_1", "chunk_2", "chunk_3"]

### Gaussian Distribution

In [None]:
## SEE EVAN'S CODE ##

### Bar Graph

In [None]:
hm_df = td_df.select([f"chunk_{i}" for i in range(12)])
#print(hm_df)

#zero = []
#for col in hm_df:
#    for row in col:
#        if row == 0:
#            zero.append(col)
#        else:
#            continue
#
#zero_value = len(zero)
##
#one = []
#for col in hm_df:
#    for row in col:
#        if row == 1:
#            one.append(col)
#        else:
#            continue
#
#one_value = len(one)
##
#two = []
#for col in hm_df:
#    for row in col:
#        if row == 2:
#            one.append(col)
#        else:
#            continue
#
#two_value = len(two)
##
#three = []
#for col in hm_df:
#    for row in col:
#        if row == 3:
#            one.append(col)
#        else:
#            continue
#
#three_value = len(three)


value_counts = hm_df.unpivot().get_column("value").value_counts().to_numpy()

#value_counts = value_counts.to_numpy()
#print(array)

t0x, t0y = np.where(value_counts==0)
t1x, t1y = np.where(value_counts==1)
t2x, t2y = np.where(value_counts==2)
t3x, t3y = np.where(value_counts==3)
#print(f'zero is {array[t0x, t0y+1]}')

#c = value_counts.transpose()
#print(14, c)
#
##print(zero_value, one_value, two_value, three_value)
#print(value_counts)
#print(0, value_counts.row(1), 1, value_counts.row(3), 2, value_counts.row(2), 3, value_counts.row(0),)
#
#zero = value_counts.filter(pl.col('value')=='0')
#print(f'zero is {zero}')


hit_events = {
    "nonevent" : 0,
    "0/0" : value_counts[t0x, t0y+1][0],
    "0/1" : value_counts[t1x, t1y+1][0],
    "1/0" : value_counts[t2x, t2y+1][0],
    "1/1" : value_counts[t3x, t3y+1][0],
}
groups = {
    "Normal"   : ["nonevent", "0/1", "1/0"],
    "Abnormal" : ["nonevent", "0/0", "1/1"],
}

width = 0.5

fig, ax = plt.subplots()


for group in groups:
    for idx, subtype in enumerate(groups[group]):
        if idx==0:
            continue

        top    = hit_events[groups[group][idx]]
        bottom = hit_events[groups[group][idx-1]]
        height = top+bottom
        #print(f'good to here', group, height, bottom, groups[group][idx], top)
        this_bar = ax.bar(x=group, height=height, bottom=bottom, label=groups[group][idx])
        ax.bar_label(this_bar, label_type='center')

#for teaching purposes (pls ignore)
""" for group in groups:
    for idx, subtype in enumerate(groups[group]):
        if idx==0:
            continue

        box2 = groups[group]

        current_value = box2[idx]
        previous_value = box2[idx-1]

        top    = hit_events[current_value]
        bottom = hit_events[previous_value]
        this_bar = ax.bar(x=group, height=top-bottom, bottom=bottom, label=current_value)
        ax.bar_label(this_bar, label_type='center') """ 

ax.legend()

ax.set_title("All Hits")
ax.set_ylabel("Hit Count")

plt.tight_layout()
plt.show()
