## Read

In [1]:
import torch
import numpy as np
import pandas as pd
import torch.nn as nn

from glob import iglob

In [2]:
def process_label(df, path):
    df['label'] = 2
    path = path.rstrip('.csv')
    path = f'{path}.txt'
    with open(path, 'r') as fr:
        lines = fr.readlines()
        for i, line in enumerate(lines[:-1]):
            line = line.rstrip('\n').split(',')
            j, l = int(line[0]), int(line[1])
            k = int(lines[i+1].split(',')[0]) - 1
            if k<j:
                print(k)
            df.loc[j:k, 'label'] = l

        line = lines[-1].rstrip('\n').split(',')
        j, l = int(line[0]), int(line[1])
        df.loc[j:, 'label'] = 1


def process_feature(df):
    df['ln_pts'] = np.log(df['pts']+1)
    df['ln_dyn'] = np.log(df['pts_dyn']+1)
    df['ln_sta'] = np.log(df['pts']-df['pts_dyn']+1)

    df['z_iqr'] = df['z_q3'] - df['z_q1']

    df['range'] = np.sqrt(np.square(df['x_c'])+np.square(df['y_c']))

    df['dx'] = df['x_c'].diff().fillna(0)
    df['dy'] = df['y_c'].diff().fillna(0)
    df['dist'] = np.sqrt(np.square(df['dx'])+np.square(df['dy']))

    df.reset_index(inplace=True)


In [3]:
dfs = []

for path in iglob(r'C:/Users/pontosense/Downloads/Target/*.csv'):
    path = path.replace('\\', '/')
    df = pd.read_csv(path, index_col=0)

    process_label(df, path)
    process_feature(df)

    n = round(df.shape[0] / 120)
    print(path.split('/')[-1], n, '{:.1%}'.format(df['label'].mean()))

    df = df[
        ['ln_pts', 'z_iqr', 'z_std', 'z_c', 'range', 'l_r', 's_r', 'dist', 'label']
    ].astype('float32').values
    for i in range(n):
        dfs.append(df)

ATR_GN_P2-240527-30019-2025-08-31-00-1384880-1388055.csv 24 98.1%
ATR_GN_P2-240527-30019-2025-09-02-00-3097855-3098844.csv 8 98.0%
ATR_GN_P2-240527-30019-2025-09-02-09-3427349-3428726.csv 7 100.0%
ATR_GN_P2-240601-00069-2025-08-27-15-181634-185130.csv 28 77.2%
ATR_GN_P2-240601-00069-2025-08-29-04-713100-716599.csv 29 98.6%
ATR_GN_P4-241029-00016-2025-09-01-07-2584784-2591384.csv 49 97.7%
ATR_GN_P4-250120-00138-2025-09-04-15-372127-373946.csv 14 95.0%
ATR_GN_P4-250120-00138-2025-09-07-15-2969150-2970732.csv 13 99.1%
ATR_GN_P4-250120-00138-2025-09-07-20-3155534-3159779.csv 20 7.1%
ATR_GN_P4-250120-00172-2025-09-01-03-7698018-7703053.csv 18 100.0%
P1-TP5-0528test-170695-173549.csv 17 99.8%
P1-TP5-0528test-199959-202507.csv 12 95.8%
P1-TP5-0528test-203583-205322.csv 13 99.4%


## Dataset & Dataloader

In [4]:
from torch.utils.data import (
    Dataset,
    DataLoader
)

In [5]:
class TrainDataset(Dataset):

    def __init__(self, dfs):
        self.dfs = dfs

    def __len__(self):
        return len(self.dfs)

    def __getitem__(self, idx):
        arr = self.dfs[idx]
        jdx = np.random.choice(arr.shape[0]-79, 1)[0]
        batch = arr[jdx:jdx+80, :-1]
        label = arr[jdx:jdx+80, -1]
        return batch, label


In [6]:
np.random.seed(2025)
torch.manual_seed(2025)

train_ds = TrainDataset(dfs)
train_dl = DataLoader(train_ds, batch_size=16, shuffle=True, num_workers=0, drop_last=True)

## Model

In [7]:
from torchinfo import summary

from layers import (
    EMA,
    Transpose
)

In [8]:
hs = 64

model = nn.Sequential(
    nn.Linear(8, hs, bias=False),
    Transpose(),
    nn.ReLU6(),
    EMA(hs, length=80),
    Transpose(),
    nn.ReLU6(),
    nn.Linear(hs, 1, bias=False)
)

summary(model, (2, 80, 8), col_names=['input_size', 'output_size', 'num_params'])

Layer (type:depth-idx)                   Input Shape               Output Shape              Param #
Sequential                               [2, 80, 8]                [2, 80, 1]                --
├─Linear: 1-1                            [2, 80, 8]                [2, 80, 64]               512
├─Transpose: 1-2                         [2, 80, 64]               [2, 64, 80]               --
├─ReLU6: 1-3                             [2, 64, 80]               [2, 64, 80]               --
├─EMA: 1-4                               [2, 64, 80]               [2, 64, 80]               128
├─Transpose: 1-5                         [2, 64, 80]               [2, 80, 64]               --
├─ReLU6: 1-6                             [2, 80, 64]               [2, 80, 64]               --
├─Linear: 1-7                            [2, 80, 64]               [2, 80, 1]                64
Total params: 704
Trainable params: 704
Non-trainable params: 0
Total mult-adds (Units.MEGABYTES): 0.00
Input size (MB): 0.01
For

## Train

In [9]:
from torchmetrics import MetricCollection
from torchmetrics.classification import (
    BinaryAccuracy,
    BinaryAUROC
)

In [10]:
# criterion
loss_fn = nn.BCEWithLogitsLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-3)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=.1**.5)

metric = MetricCollection({'acc':BinaryAccuracy(), 'auc':BinaryAUROC()})

In [11]:
epochs = 300
step = 0

model.train()

for epoch in range(epochs):

    for batch, label in train_dl:
        logit = model(batch).squeeze(-1)
        loss = loss_fn(logit, label)

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        proba = torch.sigmoid(logit)
        metric.update(proba, label)
        step += 1
    scheduler.step()

    if (epoch % 10 == 1) or (epoch==epochs-1):
        res = metric.compute()
        print(f'epoch {epoch+1:>2d} - acc: {res["acc"]:.4f} - auc: {res["auc"]:.4f}')
        metric.reset()
        torch.save(model.state_dict(), f'./ckpt/epoch{epoch}-{res["auc"]:.4f}.pth')


epoch  2 - acc: 0.8711 - auc: 0.6147
epoch 12 - acc: 0.8862 - auc: 0.8282
epoch 22 - acc: 0.8822 - auc: 0.8867
epoch 32 - acc: 0.8844 - auc: 0.9139
epoch 42 - acc: 0.8877 - auc: 0.9166
epoch 52 - acc: 0.8987 - auc: 0.9286
epoch 62 - acc: 0.9192 - auc: 0.9353
epoch 72 - acc: 0.9207 - auc: 0.9365
epoch 82 - acc: 0.9321 - auc: 0.9369
epoch 92 - acc: 0.9361 - auc: 0.9420
epoch 102 - acc: 0.9406 - auc: 0.9466
epoch 112 - acc: 0.9409 - auc: 0.9463
epoch 122 - acc: 0.9405 - auc: 0.9453
epoch 132 - acc: 0.9378 - auc: 0.9338
epoch 142 - acc: 0.9384 - auc: 0.9434
epoch 152 - acc: 0.9394 - auc: 0.9473
epoch 162 - acc: 0.9405 - auc: 0.9454
epoch 172 - acc: 0.9389 - auc: 0.9419
epoch 182 - acc: 0.9344 - auc: 0.9404
epoch 192 - acc: 0.9448 - auc: 0.9513
epoch 202 - acc: 0.9438 - auc: 0.9419
epoch 212 - acc: 0.9393 - auc: 0.9475
epoch 222 - acc: 0.9492 - auc: 0.9501
epoch 232 - acc: 0.9395 - auc: 0.9394
epoch 242 - acc: 0.9396 - auc: 0.9447
epoch 252 - acc: 0.9381 - auc: 0.9433
epoch 262 - acc: 0.940

In [12]:
model

Sequential(
  (0): Linear(in_features=8, out_features=64, bias=False)
  (1): Transpose()
  (2): ReLU6()
  (3): EMA(in_chn=64, length=80)
  (4): Transpose()
  (5): ReLU6()
  (6): Linear(in_features=64, out_features=1, bias=False)
)

In [13]:
model[3].alpha.data.flatten()

tensor([0.0552, 0.4516, 0.4204, 0.0735, 0.0100, 0.2558, 0.1382, 0.0104, 0.8001,
        0.1398, 0.9042, 0.7046, 0.0100, 0.0909, 0.9341, 0.3078, 0.0100, 0.2093,
        0.1412, 0.5658, 0.0136, 0.1505, 0.2014, 0.6369, 0.1727, 0.7603, 0.0100,
        0.1473, 0.8202, 0.4745, 0.2965, 0.6796, 0.3235, 0.9530, 0.1374, 0.7407,
        0.0100, 0.0491, 0.5837, 0.6580, 0.3492, 0.1696, 0.0100, 0.1417, 0.0100,
        0.0100, 0.2849, 0.8315, 0.3609, 0.1565, 0.7086, 0.7284, 0.9117, 0.0138,
        0.8856, 0.5010, 0.2206, 0.3838, 0.3552, 0.5857, 0.8184, 0.3391, 0.0100,
        0.7613])

## Test

In [14]:
dfs = []

for path in iglob(r'C:/Users/pontosense/Downloads/Target/test/*.csv'):
    path = path.replace('\\', '/')
    df = pd.read_csv(path, index_col=0)

    process_label(df, path)
    process_feature(df)

    n = df.shape[0] // 80
    print(path.split('/')[-1], n, '{:.1%}'.format(df['label'].mean()))

    df = df[
        ['ln_pts', 'z_iqr', 'z_std', 'z_c', 'range', 'l_r', 's_r', 'dist', 'label']
    ].astype('float32').values
    for i in range(n):
        dfs.append(df[i*80:(i+1)*80])

ATR_GN_P2-240527-30019-2025-09-02-01-3123914-3124287.csv 4 100.0%
ATR_GN_P2-240527-30019-2025-09-03-11-4351701-4352882.csv 4 31.6%
P1-TP5-0528test-185780-186966.csv 11 100.0%


In [15]:
len(dfs)

19

In [16]:
class TestDataset(Dataset):

    def __init__(self, dfs):
        self.dfs = dfs

    def __len__(self):
        return len(self.dfs)

    def __getitem__(self, idx):
        arr = self.dfs[idx]
        batch = arr[:, :-1]
        label = arr[:, -1]
        return batch, label

In [17]:
test_ds = TestDataset(dfs)
test_dl = DataLoader(test_ds, batch_size=4, shuffle=False, num_workers=0, drop_last=False)

In [18]:
model.eval()

for path in iglob('ckpt/*.pth'):
    ckpt = torch.load(path)
    model.load_state_dict(ckpt)
    s = path.split('\\')[-1]

    for batch, label in test_dl:
        with torch.no_grad():
            logit = model(batch).squeeze(-1)
            proba = torch.sigmoid(logit)

        metric.update(proba, label)

    res = metric.compute()
    print(f'{s}  - acc: {res["acc"]:.4f} - auc: {res["auc"]:.4f}')
    metric.reset()

epoch1-0.6147.pth  - acc: 0.8401 - auc: 0.8743
epoch101-0.9466.pth  - acc: 0.9289 - auc: 0.8963
epoch11-0.8282.pth  - acc: 0.8401 - auc: 0.9040
epoch111-0.9463.pth  - acc: 0.9289 - auc: 0.8964
epoch121-0.9453.pth  - acc: 0.9276 - auc: 0.8954
epoch131-0.9338.pth  - acc: 0.9283 - auc: 0.8956
epoch141-0.9434.pth  - acc: 0.9316 - auc: 0.8949
epoch151-0.9473.pth  - acc: 0.9316 - auc: 0.8948
epoch161-0.9454.pth  - acc: 0.9309 - auc: 0.8946
epoch171-0.9419.pth  - acc: 0.9303 - auc: 0.8946
epoch181-0.9404.pth  - acc: 0.9296 - auc: 0.8946
epoch191-0.9513.pth  - acc: 0.9303 - auc: 0.8946
epoch201-0.9419.pth  - acc: 0.9303 - auc: 0.8947
epoch21-0.8867.pth  - acc: 0.8401 - auc: 0.9330
epoch211-0.9475.pth  - acc: 0.9303 - auc: 0.8946
epoch221-0.9501.pth  - acc: 0.9303 - auc: 0.8948
epoch231-0.9394.pth  - acc: 0.9296 - auc: 0.8948
epoch241-0.9447.pth  - acc: 0.9303 - auc: 0.8948
epoch251-0.9433.pth  - acc: 0.9296 - auc: 0.8948
epoch261-0.9521.pth  - acc: 0.9303 - auc: 0.8947
epoch271-0.9426.pth  - a

In [None]:
model.eval()

path = 'ckpt/epoch21-0.8867.pth'
ckpt = torch.load(path)
s = path.split('//')[-1]
model.load_state_dict(ckpt)

probas = []
labels = []
for batch, label in test_dl:
    with torch.no_grad():
        logit = model(batch).squeeze(-1)
        proba = torch.sigmoid(logit)

    labels.append(label.flatten().numpy())
    probas.append(proba.flatten().numpy())
    metric.update(proba, label)
    break

# res = metric.compute()
# print(f'{s}  - acc: {res["acc"]:.4f} - auc: {res["auc"]:.4f}')
# metric.reset()

In [45]:
batch[0][1].numpy()

array([3.9512436 , 0.4121284 , 0.57202524, 0.9942352 , 0.38036877,
       0.11640994, 0.07208236, 0.08725858], dtype=float32)

In [46]:
logit[0]

tensor([2.3988, 2.5763, 3.1042, 3.0979, 2.9816, 2.9710, 3.0804, 3.0413, 3.1472,
        3.0080, 2.8196, 2.8772, 2.8212, 2.7985, 2.7628, 2.7143, 2.5950, 2.8100,
        2.7257, 2.5937, 2.5216, 2.8997, 3.0910, 2.8126, 3.1184, 2.7823, 2.6153,
        2.6994, 2.6169, 2.7610, 2.7248, 2.7025, 2.6632, 2.5674, 2.6755, 2.9374,
        2.7991, 2.8609, 2.8261, 2.9362, 2.8920, 2.8185, 2.7370, 2.7926, 2.7865,
        3.2422, 3.1860, 2.7271, 2.9050, 2.5613, 2.6024, 2.6309, 2.5244, 2.4070,
        2.4062, 2.5203, 2.3529, 2.5255, 2.4612, 2.8329, 2.8527, 2.4304, 2.8247,
        2.8194, 2.7688, 2.4443, 2.2603, 2.4704, 2.4080, 2.4878, 2.4988, 2.5937,
        2.4271, 2.5507, 2.5429, 2.4925, 2.4451, 2.3706, 2.1252, 2.2868])

In [35]:
probas[0][0]

np.float32(0.91673785)

In [21]:
probas = np.concat(probas)
y_true = np.concat(labels)

In [28]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_true, probas>0.8)

array([[ 186,   57],
       [  74, 1203]])

In [None]:
[(0, 2.5), (2.5, 3), (5.5, 2)]

In [37]:
arr = np.asarray([1, 1, 1, 0, 0, 0, 1, 1])

def pre_broken_barh(arr):
    diff = np.where(np.diff(arr)!=0)[0]
    colors = []
    xranges = []

    for i, j in enumerate(np.r_[-1, diff]):
        colors.append(f'C{arr[j+1]}')
        if i == 0:
            xranges.append((0, float(diff[0])+0.5))
            continue
        elif i < len(diff):
            i = float(diff[i])
        else:
            i = float(len(arr)-1)

        j = float(j)
        xranges.append((j+.5, i-j))
    return xranges, colors


In [38]:
df = pd.read_csv('C:/Users/pontosense/Downloads/Target/test2/ATR_GN_P2-240527-30019-2025-09-02-01-3123914-3124977_res.csv').loc[:320-1]
df = pd.concat(
    [df,
    pd.read_csv('C:/Users/pontosense/Downloads/Target/test2/ATR_GN_P2-240527-30019-09-03-11-4351701-4353067_res.csv').loc[:320-1]],
    ignore_index=True
)
df = pd.concat(
    [df,
    pd.read_csv('C:/Users/pontosense/Downloads/Target/test2/P1-TP5-0528test-185780-186966_res.csv').loc[:880-1]],
    ignore_index=True
)


In [None]:
import matplotlib.pyplot as plt

fig, ax = plt.subplots(figsize=(10, 3))

xranges, colors = pre_broken_barh(df['pred'].astype(int))
ax.broken_barh(xranges,  (.2, .6), facecolors=colors)
xranges, colors = pre_broken_barh((probas>0.8).astype(int))
ax.broken_barh(xranges,  (1.2, .6), facecolors=colors)
xranges, colors = pre_broken_barh(y_true.astype(int))
ax.broken_barh(xranges,  (2.2, .6), facecolors=colors)

# 设置坐标范围与样式
ax.set_xlim(0, len(y_true))
ax.set_ylim(-0.5, 3.5)
ax.set_yticks([0.5, 1.5, 2.5], ['RF-PRED', 'RNN-PRED', 'GT'])
# ax.set_xlabel("Time")

plt.tight_layout()
plt.show()