In [41]:
import polars as pl
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

import matplotlib.pyplot as plt
from src.settings import TARGET_COLS

In [3]:
df = pl.read_csv("./data/train.csv")
df.head()

eeg_id,eeg_sub_id,eeg_label_offset_seconds,spectrogram_id,spectrogram_sub_id,spectrogram_label_offset_seconds,label_id,patient_id,expert_consensus,seizure_vote,lpd_vote,gpd_vote,lrda_vote,grda_vote,other_vote
i64,i64,f64,i64,i64,f64,i64,i64,str,i64,i64,i64,i64,i64,i64
1628180742,0,0.0,353733,0,0.0,127492639,42516,"""Seizure""",3,0,0,0,0,0
1628180742,1,6.0,353733,1,6.0,3887563113,42516,"""Seizure""",3,0,0,0,0,0
1628180742,2,8.0,353733,2,8.0,1142670488,42516,"""Seizure""",3,0,0,0,0,0
1628180742,3,18.0,353733,3,18.0,2718991173,42516,"""Seizure""",3,0,0,0,0,0
1628180742,4,24.0,353733,4,24.0,3080632009,42516,"""Seizure""",3,0,0,0,0,0


In [42]:
def plot_eeg(df, idx=None):
    if idx is None:
        idx = np.random.randint(0, df.shape[0])
    print(df[idx][TARGET_COLS])
    eeg_id = df["eeg_id"][idx]
    offset = df["eeg_label_offset_seconds"][idx]
    expert_consensus = df["expert_consensus"][idx]
    eeg_df = pl.read_parquet(f"./data/train_eegs/{eeg_id}.parquet")
    eeg_df = eeg_df.to_pandas()
    fig = make_subplots(
        rows=20,
        cols=1,
        shared_xaxes=True,
        row_heights=[20] * 20,
        vertical_spacing=0.01,
    )
    start = offset  * 200
    eeg_df = eeg_df.iloc[start : start + 10000]
    for i, col in enumerate(eeg_df.columns):
        fig.add_trace(
            go.Scatter(y=eeg_df[col].values[::10], name=col), row=i + 1, col=1
        )
    fig.update_layout(
        autosize=False,
        width=800,
        height=1600,
    )
    fig.show()
    return

In [43]:
plot_eeg(df)

shape: (1, 6)
┌──────────────┬──────────┬──────────┬───────────┬───────────┬────────────┐
│ seizure_vote ┆ lpd_vote ┆ gpd_vote ┆ lrda_vote ┆ grda_vote ┆ other_vote │
│ ---          ┆ ---      ┆ ---      ┆ ---       ┆ ---       ┆ ---        │
│ i64          ┆ i64      ┆ i64      ┆ i64       ┆ i64       ┆ i64        │
╞══════════════╪══════════╪══════════╪═══════════╪═══════════╪════════════╡
│ 0            ┆ 0        ┆ 2        ┆ 0         ┆ 0         ┆ 2          │
└──────────────┴──────────┴──────────┴───────────┴───────────┴────────────┘


In [5]:
df.shape

(106800, 15)

In [6]:
df["spectrogram_id"].n_unique()

11138

In [7]:
df["patient_id"].n_unique(), df["label_id"].n_unique()

(1950, 106800)

In [8]:
test = pl.read_csv("../data/test.csv")
test.head()

spectrogram_id,eeg_id,patient_id
i64,i64,i64
853520,3911565283,6885


In [9]:
test["spectrogram_id"].n_unique(), test["eeg_id"].n_unique()

(1, 1)

In [10]:
import matplotlib.pyplot as plt

spec_df = pl.read_parquet(
    f"../data/train_spectrograms/{df['spectrogram_id'][0]}.parquet"
)

In [11]:
spec_df

time,LL_0.59,LL_0.78,LL_0.98,LL_1.17,LL_1.37,LL_1.56,LL_1.76,LL_1.95,LL_2.15,LL_2.34,LL_2.54,LL_2.73,LL_2.93,LL_3.13,LL_3.32,LL_3.52,LL_3.71,LL_3.91,LL_4.1,LL_4.3,LL_4.49,LL_4.69,LL_4.88,LL_5.08,LL_5.27,LL_5.47,LL_5.66,LL_5.86,LL_6.05,LL_6.25,LL_6.45,LL_6.64,LL_6.84,LL_7.03,LL_7.23,LL_7.42,…,RP_12.89,RP_13.09,RP_13.28,RP_13.48,RP_13.67,RP_13.87,RP_14.06,RP_14.26,RP_14.45,RP_14.65,RP_14.84,RP_15.04,RP_15.23,RP_15.43,RP_15.63,RP_15.82,RP_16.02,RP_16.21,RP_16.41,RP_16.6,RP_16.8,RP_16.99,RP_17.19,RP_17.38,RP_17.58,RP_17.77,RP_17.97,RP_18.16,RP_18.36,RP_18.55,RP_18.75,RP_18.95,RP_19.14,RP_19.34,RP_19.53,RP_19.73,RP_19.92
i64,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,…,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32
1,4.26,10.98,9.05,13.65,11.49,8.93,18.84,19.26,19.24,19.049999,8.82,5.4,6.47,6.08,3.94,7.67,4.23,5.86,5.23,3.69,2.46,2.32,2.57,3.32,4.01,2.46,3.75,3.61,3.38,2.97,2.61,2.0,1.53,1.7,1.81,1.19,…,0.59,0.53,0.42,0.37,0.18,0.18,0.19,0.1,0.11,0.14,0.19,0.17,0.16,0.17,0.06,0.08,0.42,0.6,0.95,1.07,1.09,1.13,0.46,0.54,0.43,0.32,0.39,0.31,0.17,0.28,0.19,0.24,0.27,0.29,0.16,0.22,0.19
3,2.65,3.97,12.18,13.26,14.21,13.23,9.65,8.11,11.28,8.46,5.48,4.17,5.55,3.96,4.71,5.09,3.99,3.6,3.7,1.9,1.88,2.17,1.91,2.5,3.56,3.1,3.44,4.53,4.17,3.02,3.11,2.22,1.83,2.01,1.39,1.04,…,0.68,0.67,0.65,0.29,0.33,0.14,0.12,0.15,0.12,0.1,0.11,0.15,0.17,0.25,0.31,0.61,0.86,1.03,1.28,1.11,0.87,0.66,0.59,0.32,0.27,0.22,0.18,0.15,0.13,0.14,0.24,0.24,0.36,0.35,0.31,0.36,0.4
5,4.18,4.53,8.77,14.26,13.36,16.559999,19.219999,17.51,22.65,21.719999,17.75,13.57,5.59,4.79,3.26,2.91,2.93,2.68,4.23,5.22,6.22,6.21,5.79,6.1,3.24,3.69,3.52,1.91,2.94,2.58,2.09,1.65,0.58,0.79,0.74,0.68,…,0.3,0.36,0.38,0.28,0.23,0.18,0.26,0.21,0.18,0.21,0.24,0.35,0.37,0.27,0.75,0.74,1.53,1.51,0.99,1.02,0.53,0.29,0.2,0.2,0.26,0.25,0.28,0.29,0.21,0.16,0.25,0.28,0.28,0.34,0.48,0.44,0.48
7,2.41,3.21,4.92,8.07,5.97,12.42,10.82,14.96,21.809999,19.629999,17.43,13.14,7.44,5.39,3.93,4.47,3.41,2.4,7.16,5.56,7.59,9.23,5.28,5.09,7.6,5.5,4.55,5.32,3.71,3.35,2.96,3.1,1.72,1.76,1.16,1.15,…,0.34,0.32,0.37,0.37,0.37,0.3,0.27,0.2,0.28,0.44,0.39,0.39,0.55,0.46,0.39,0.92,0.9,0.92,1.0,0.88,0.71,0.65,0.61,0.63,0.44,0.42,0.41,0.33,0.51,0.49,0.64,0.58,0.42,0.32,0.31,0.32,0.33
9,2.29,2.44,2.77,4.62,5.39,7.08,9.84,12.27,14.41,13.31,11.46,12.32,6.97,8.5,7.07,3.98,3.54,2.72,3.57,5.45,5.19,6.39,8.67,7.47,7.77,6.3,5.38,3.99,3.93,3.55,3.87,3.56,2.16,1.29,1.1,1.55,…,0.24,0.31,0.36,0.41,0.39,0.31,0.24,0.19,0.15,0.16,0.45,0.39,0.47,0.63,0.39,0.23,0.52,0.79,1.12,1.12,1.13,0.98,0.38,0.74,0.53,0.55,0.59,0.44,0.38,0.48,0.63,0.45,0.45,0.49,0.33,0.31,0.34
11,2.77,3.5,5.17,4.29,4.45,6.99,8.96,7.87,18.200001,15.8,13.27,13.36,4.87,4.48,5.55,4.8,4.58,4.4,4.4,4.38,4.86,5.74,5.31,5.0,4.94,2.78,2.34,2.33,3.81,3.47,4.39,3.61,1.9,2.06,1.99,1.68,…,0.14,0.15,0.17,0.14,0.2,0.3,0.29,0.24,0.18,0.16,0.18,0.37,0.44,0.46,0.48,0.59,0.46,0.76,0.72,1.06,0.95,0.66,0.7,0.35,0.4,0.38,0.33,0.36,0.33,0.48,0.23,0.24,0.22,0.22,0.18,0.16,0.27
13,2.87,3.45,3.1,3.94,8.17,11.74,14.71,18.59,24.66,17.75,18.379999,14.6,3.3,2.75,3.58,4.27,3.8,3.47,3.57,4.69,5.66,6.36,5.94,4.98,2.83,3.23,3.12,4.03,4.57,4.63,3.52,2.17,2.9,2.44,2.18,2.05,…,0.2,0.16,0.16,0.13,0.25,0.35,0.33,0.31,0.27,0.18,0.13,0.25,0.29,0.26,0.23,0.3,0.37,0.47,0.73,0.83,0.9,1.01,0.34,0.68,0.32,0.34,0.42,0.39,0.4,0.38,0.52,0.46,0.33,0.35,0.18,0.23,0.23
15,3.86,4.07,4.26,6.46,8.06,11.39,9.6,16.530001,23.459999,24.33,21.440001,20.34,7.32,5.38,2.9,3.16,2.67,2.31,2.8,3.87,3.73,4.49,4.19,3.76,2.85,2.79,3.03,3.29,3.74,4.07,2.67,1.93,0.75,1.42,1.56,1.59,…,0.2,0.19,0.18,0.14,0.24,0.19,0.17,0.2,0.12,0.11,0.13,0.18,0.21,0.39,0.51,0.45,0.48,0.58,0.52,0.88,1.02,1.0,0.9,0.65,0.36,0.29,0.32,0.36,0.39,0.5,0.53,0.43,0.48,0.33,0.24,0.18,0.2
17,9.33,11.45,12.28,11.26,7.21,3.62,8.97,12.92,17.389999,19.360001,18.559999,11.39,8.87,5.94,4.86,1.8,1.45,1.84,2.02,3.5,4.11,4.34,5.18,4.54,3.22,3.22,1.93,1.89,2.12,1.66,1.44,1.16,0.94,1.33,1.58,1.49,…,0.17,0.19,0.1,0.12,0.12,0.11,0.14,0.13,0.19,0.2,0.22,0.28,0.31,0.51,0.44,0.47,0.43,0.31,0.35,0.29,0.64,0.71,1.03,1.06,0.65,0.74,0.25,0.35,0.38,0.32,0.31,0.43,0.22,0.26,0.3,0.28,0.25
19,7.39,11.34,12.52,12.49,14.19,11.72,15.29,19.51,24.83,25.17,18.34,17.73,6.57,4.18,3.46,3.96,3.51,3.44,4.15,3.72,4.28,5.33,7.43,7.52,7.34,6.83,4.0,3.09,2.05,1.99,1.64,1.81,1.51,1.51,1.52,1.23,…,0.12,0.1,0.13,0.14,0.15,0.18,0.2,0.17,0.16,0.16,0.2,0.2,0.22,0.29,0.21,0.23,0.15,0.15,0.19,0.2,0.32,1.19,1.01,1.13,1.13,0.47,0.34,0.26,0.38,0.32,0.29,0.26,0.18,0.18,0.26,0.31,0.36


In [12]:
spec_df.head()

time,LL_0.59,LL_0.78,LL_0.98,LL_1.17,LL_1.37,LL_1.56,LL_1.76,LL_1.95,LL_2.15,LL_2.34,LL_2.54,LL_2.73,LL_2.93,LL_3.13,LL_3.32,LL_3.52,LL_3.71,LL_3.91,LL_4.1,LL_4.3,LL_4.49,LL_4.69,LL_4.88,LL_5.08,LL_5.27,LL_5.47,LL_5.66,LL_5.86,LL_6.05,LL_6.25,LL_6.45,LL_6.64,LL_6.84,LL_7.03,LL_7.23,LL_7.42,…,RP_12.89,RP_13.09,RP_13.28,RP_13.48,RP_13.67,RP_13.87,RP_14.06,RP_14.26,RP_14.45,RP_14.65,RP_14.84,RP_15.04,RP_15.23,RP_15.43,RP_15.63,RP_15.82,RP_16.02,RP_16.21,RP_16.41,RP_16.6,RP_16.8,RP_16.99,RP_17.19,RP_17.38,RP_17.58,RP_17.77,RP_17.97,RP_18.16,RP_18.36,RP_18.55,RP_18.75,RP_18.95,RP_19.14,RP_19.34,RP_19.53,RP_19.73,RP_19.92
i64,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,…,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32
1,4.26,10.98,9.05,13.65,11.49,8.93,18.84,19.26,19.24,19.049999,8.82,5.4,6.47,6.08,3.94,7.67,4.23,5.86,5.23,3.69,2.46,2.32,2.57,3.32,4.01,2.46,3.75,3.61,3.38,2.97,2.61,2.0,1.53,1.7,1.81,1.19,…,0.59,0.53,0.42,0.37,0.18,0.18,0.19,0.1,0.11,0.14,0.19,0.17,0.16,0.17,0.06,0.08,0.42,0.6,0.95,1.07,1.09,1.13,0.46,0.54,0.43,0.32,0.39,0.31,0.17,0.28,0.19,0.24,0.27,0.29,0.16,0.22,0.19
3,2.65,3.97,12.18,13.26,14.21,13.23,9.65,8.11,11.28,8.46,5.48,4.17,5.55,3.96,4.71,5.09,3.99,3.6,3.7,1.9,1.88,2.17,1.91,2.5,3.56,3.1,3.44,4.53,4.17,3.02,3.11,2.22,1.83,2.01,1.39,1.04,…,0.68,0.67,0.65,0.29,0.33,0.14,0.12,0.15,0.12,0.1,0.11,0.15,0.17,0.25,0.31,0.61,0.86,1.03,1.28,1.11,0.87,0.66,0.59,0.32,0.27,0.22,0.18,0.15,0.13,0.14,0.24,0.24,0.36,0.35,0.31,0.36,0.4
5,4.18,4.53,8.77,14.26,13.36,16.559999,19.219999,17.51,22.65,21.719999,17.75,13.57,5.59,4.79,3.26,2.91,2.93,2.68,4.23,5.22,6.22,6.21,5.79,6.1,3.24,3.69,3.52,1.91,2.94,2.58,2.09,1.65,0.58,0.79,0.74,0.68,…,0.3,0.36,0.38,0.28,0.23,0.18,0.26,0.21,0.18,0.21,0.24,0.35,0.37,0.27,0.75,0.74,1.53,1.51,0.99,1.02,0.53,0.29,0.2,0.2,0.26,0.25,0.28,0.29,0.21,0.16,0.25,0.28,0.28,0.34,0.48,0.44,0.48
7,2.41,3.21,4.92,8.07,5.97,12.42,10.82,14.96,21.809999,19.629999,17.43,13.14,7.44,5.39,3.93,4.47,3.41,2.4,7.16,5.56,7.59,9.23,5.28,5.09,7.6,5.5,4.55,5.32,3.71,3.35,2.96,3.1,1.72,1.76,1.16,1.15,…,0.34,0.32,0.37,0.37,0.37,0.3,0.27,0.2,0.28,0.44,0.39,0.39,0.55,0.46,0.39,0.92,0.9,0.92,1.0,0.88,0.71,0.65,0.61,0.63,0.44,0.42,0.41,0.33,0.51,0.49,0.64,0.58,0.42,0.32,0.31,0.32,0.33
9,2.29,2.44,2.77,4.62,5.39,7.08,9.84,12.27,14.41,13.31,11.46,12.32,6.97,8.5,7.07,3.98,3.54,2.72,3.57,5.45,5.19,6.39,8.67,7.47,7.77,6.3,5.38,3.99,3.93,3.55,3.87,3.56,2.16,1.29,1.1,1.55,…,0.24,0.31,0.36,0.41,0.39,0.31,0.24,0.19,0.15,0.16,0.45,0.39,0.47,0.63,0.39,0.23,0.52,0.79,1.12,1.12,1.13,0.98,0.38,0.74,0.53,0.55,0.59,0.44,0.38,0.48,0.63,0.45,0.45,0.49,0.33,0.31,0.34


In [13]:
eeg_df = pl.read_parquet(f"../data/train_eegs/{df['eeg_id'][0]}.parquet")

In [14]:
eeg_df

Fp1,F3,C3,P3,F7,T3,T5,O1,Fz,Cz,Pz,Fp2,F4,C4,P4,F8,T4,T6,O2,EKG
f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32
-80.519997,-70.540001,-80.110001,-108.75,-120.330002,-88.620003,-101.75,-104.489998,-99.129997,-90.389999,-97.040001,-77.989998,-88.830002,-112.120003,-108.110001,-95.949997,-98.360001,-121.730003,-106.449997,7.92
-80.449997,-70.330002,-81.760002,-107.669998,-120.769997,-90.82,-104.260002,-99.730003,-99.07,-92.290001,-96.019997,-84.5,-84.989998,-115.610001,-103.860001,-97.470001,-89.290001,-115.5,-102.059998,29.219999
-80.209999,-75.870003,-82.050003,-106.010002,-117.5,-87.489998,-99.589996,-96.82,-119.68,-99.360001,-91.110001,-99.440002,-104.589996,-127.529999,-113.349998,-95.870003,-96.019997,-123.879997,-105.790001,45.740002
-84.709999,-75.339996,-87.480003,-108.970001,-121.410004,-94.75,-105.370003,-100.279999,-113.839996,-102.059998,-95.040001,-99.230003,-101.220001,-125.769997,-111.889999,-97.459999,-97.18,-128.940002,-109.889999,83.870003
-90.57,-80.790001,-93.0,-113.870003,-129.960007,-102.860001,-118.599998,-101.099998,-107.660004,-102.339996,-98.510002,-95.300003,-88.93,-115.639999,-99.800003,-97.5,-88.730003,-114.849998,-100.25,97.769997
-96.739998,-91.18,-94.940002,-111.849998,-129.570007,-100.150002,-107.949997,-97.839996,-129.490005,-110.010002,-93.900002,-117.620003,-107.790001,-125.959999,-106.029999,-99.809998,-93.800003,-120.349998,-100.769997,91.269997
-98.529999,-99.879997,-102.720001,-114.370003,-134.169998,-103.389999,-105.739998,-102.660004,-136.169998,-116.75,-96.459999,-114.419998,-116.620003,-139.149994,-111.260002,-101.949997,-101.209999,-128.0,-106.389999,92.809998
-106.18,-110.68,-117.760002,-122.389999,-145.399994,-121.790001,-118.629997,-107.160004,-132.5,-119.589996,-105.309998,-103.589996,-107.32,-134.509995,-105.269997,-108.620003,-90.900002,-121.139999,-103.089996,104.709999
-114.260002,-123.260002,-126.019997,-123.110001,-144.210007,-123.279999,-116.760002,-102.940002,-151.960007,-125.669998,-102.370003,-121.720001,-122.199997,-133.080002,-110.68,-117.370003,-104.489998,-125.43,-102.739998,105.07
-121.730003,-127.860001,-123.330002,-120.5,-150.509995,-114.580002,-111.290001,-102.050003,-163.320007,-130.449997,-100.610001,-134.080002,-135.539993,-150.580002,-122.489998,-117.910004,-113.110001,-137.690002,-108.540001,94.379997


In [17]:
df.head()

eeg_id,eeg_sub_id,eeg_label_offset_seconds,spectrogram_id,spectrogram_sub_id,spectrogram_label_offset_seconds,label_id,patient_id,expert_consensus,seizure_vote,lpd_vote,gpd_vote,lrda_vote,grda_vote,other_vote
i64,i64,f64,i64,i64,f64,i64,i64,str,i64,i64,i64,i64,i64,i64
1628180742,0,0.0,353733,0,0.0,127492639,42516,"""Seizure""",3,0,0,0,0,0
1628180742,1,6.0,353733,1,6.0,3887563113,42516,"""Seizure""",3,0,0,0,0,0
1628180742,2,8.0,353733,2,8.0,1142670488,42516,"""Seizure""",3,0,0,0,0,0
1628180742,3,18.0,353733,3,18.0,2718991173,42516,"""Seizure""",3,0,0,0,0,0
1628180742,4,24.0,353733,4,24.0,3080632009,42516,"""Seizure""",3,0,0,0,0,0


In [22]:
import plotly.express as px

eeg_pd = eeg_df[::10].to_pandas()
px.line(eeg_pd)

ValueError: Mime type rendering requires nbformat>=4.2.0 but it is not installed

In [34]:
from src.dataset import HMSTrainEEGData

ModuleNotFoundError: No module named 'src'