In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from mh_file_parser import parse
import polars as pl
import plotly.express as px

In [3]:
inputfile = open("./sampledata/default_pulse.ptu", "rb")
result = parse(inputfile)

Writing 27626527 records, this may take a while...
{'globRes': 5e-12, 'numRecords': 27626527}
Progress: 99.9%

In [4]:
# show channel: event count
[f"{i}: {len(ch)}" for i, ch in enumerate(result.events) if len(ch) > 0]

['0: 14614015', '1: 4528237', '2: 8484275']

In [6]:
# generate random data
import random

data = result.events[0] + result.events[1] + result.events[2]
data.sort()
df = pl.concat(
    [
        pl.DataFrame({"timestamp": result.events[0], "ch": 0}),
        pl.DataFrame({"timestamp": result.events[1], "ch": 1}),
        pl.DataFrame({"timestamp": result.events[2], "ch": 2}),
    ]
).sort("timestamp")

In [7]:
df

timestamp,ch
f64,i64
65169.6,1
301060.6,2
321908.0,0
464822.4,2
541673.8,0
1726647.6,0
1.7999e6,0
1808993.6,2
1.8621e6,0
3065882.2,2


In [8]:
def calculate_time_diff(df, channel_from, channel_to):
    # チャンネルのフィルタリング
    filtered_df = df.filter(pl.col("ch").is_in([channel_from, channel_to]))

    # 時間差分を計算
    time_diffs = (
        filtered_df.sort("timestamp")
        .with_columns(
            [
                pl.col("ch").shift(-1).alias("next_channel"),
                pl.col("timestamp").shift(-1).alias("next_timestamp"),
            ]
        )
        .filter((pl.col("ch") == channel_from) & (pl.col("next_channel") == channel_to))
        .with_columns(
            [(pl.col("next_timestamp") - pl.col("timestamp")).alias("time_diff")]
        )
        .filter((pl.col("time_diff") > 0) & (pl.col("time_diff") < 2000))
        .select(["ch", "next_channel", "time_diff"])
    )

    return time_diffs


diff01_df = calculate_time_diff(df, 0, 1)
diff02_df = calculate_time_diff(df, 0, 2)

In [9]:
diff01_df

ch,next_channel,time_diff
i64,i64,f64
0,1,59.6
0,1,211.8
0,1,778.0
0,1,144.2
0,1,780.0
0,1,141.2
0,1,1742.4
0,1,292.2
0,1,1811.0
0,1,1821.6


In [11]:
# show raw data histogram
fig = px.histogram(diff01_df.to_pandas(), x="time_diff", nbins=int(1000))
fig.update_layout(bargap=0.2)

In [12]:
fig = px.histogram(diff02_df.to_pandas(), x="time_diff", nbins=int(10000))
fig.update_layout(bargap=0.2)