## 全ての差分特徴量を作る場合

In [None]:
df_eeg = pl.read_parquet("../input/train_eegs/568657.parquet")

In [None]:
df_eeg.head()

Fp1,F3,C3,P3,F7,T3,T5,O1,Fz,Cz,Pz,Fp2,F4,C4,P4,F8,T4,T6,O2,EKG
f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32
-75.360001,86.379997,65.470001,62.580002,170.350006,92.370003,39.68,89.870003,18.26,-28.440001,-15.19,16.93,13.81,-42.16,78.099998,-143.649994,121.239998,-1.47,72.550003,-3090.090088
-107.739998,53.139999,32.59,21.950001,140.5,59.07,4.26,53.310001,-18.49,-63.27,-52.110001,-13.23,-16.42,-80.43,64.449997,-168.5,88.400002,-28.52,40.25,5560.439941
-103.050003,46.330002,10.75,9.15,137.820007,50.950001,-3.5,45.07,-20.879999,-66.629997,-69.75,-9.2,-21.4,-93.089996,5.12,-176.089996,51.689999,-38.639999,31.82,-4161.450195
-75.099998,77.870003,40.150002,31.790001,166.630005,76.769997,17.99,67.650002,11.14,-32.950001,-38.509998,14.77,9.79,-66.480003,48.220001,-147.990005,86.629997,-18.639999,62.509998,31769.970703
-97.919998,58.209999,27.33,5.34,144.080002,47.200001,-10.43,40.25,-12.35,-52.5,-54.919998,-10.07,-8.04,-90.82,67.269997,-164.710007,80.68,-36.52,41.830002,6879.720215


In [None]:
# train データの各EEGデータとそろえるための時間のキーをつくる
t = 1/200                                   # EEGのサンプリング周波数は200Hzなので、1行は1/200秒ごとの計測値を表す  
seconds = [i*t for i in range(len(df_eeg))]
df_eeg = df_eeg.with_columns(pl.Series(seconds).alias("seconds")) # 各行のEEGサンプリング開始からの秒数
df_eeg = df_eeg.with_columns((pl.col("seconds") - 25).alias("offset_seconds")) # df_trainには、サンプリングをおこなった時間幅[T-25, T+25]のうち、計測開始時間 T-25 がoffsetとして存在する

In [None]:
# 差分特徴量を作る
df_eeg = df_eeg.with_columns(
    (pl.col("Fp1") - pl.col("F7")).alias("LL_Fp1-F7"),
    (pl.col("F7") - pl.col("T3")).alias("LL_F7-T3"),
    (pl.col("T3") - pl.col("T5")).alias("LL_T3-T5"),
    (pl.col("T5") - pl.col("O1")).alias("LL_T5-O1"),
    (pl.col("Fp1") - pl.col("F3")).alias("LP_Fp1-F7"),
    (pl.col("F3") - pl.col("C3")).alias("LP_F3-C3"),
    (pl.col("C3") - pl.col("P3")).alias("LP_C3-P3"),
    (pl.col("P3") - pl.col("O1")).alias("LP_P3-O1"),
    (pl.col("Fp2") - pl.col("F4")).alias("RP_Fp2-F4"),
    (pl.col("F4") - pl.col("C4")).alias("RP_F4-C4"),
    (pl.col("C4") - pl.col("P4")).alias("RP_C4-P4"),
    (pl.col("P4") - pl.col("O2")).alias("RP_P4-O2"),
    (pl.col("Fp2") - pl.col("F8")).alias("RR_Fp2-F8"),
    (pl.col("F8") - pl.col("T4")).alias("RR_F8-T4"),
    (pl.col("T4") - pl.col("T6")).alias("RR_T4-T6"),
    (pl.col("T6") - pl.col("O2")).alias("RR_T6-O2"),
)

# 差分特徴量の集計特徴量をつくる
window = 4000 # 20秒窓 (20秒 / 1レコード1/200秒 = 4000レコード)
df_eeg = df_eeg.with_columns(
    pl.col("LL_Fp1-F7").rolling_mean(window_size=window).alias("mean20s_LL_Fp1-F7"),
    pl.col("LL_Fp1-F7").rolling_max(window_size=window).alias("min20s_LL_Fp1-F7"),
    pl.col("LL_Fp1-F7").rolling_min(window_size=window).alias("max20s_LL_Fp1-F7"),
    pl.col("LL_Fp1-F7").rolling_std(window_size=window).alias("std20s_LL_Fp1-F7"),
    (pl.col("LL_Fp1-F7").rolling_max(window_size=window) - pl.col("LL_Fp1-F7").rolling_min(window_size=window)).alias("max-min_20s_LL_Fp1-F7"),
    
    pl.col("LL_F7-T3").rolling_mean(window_size=window).alias("mean20s_LL_F7-T3"),
    pl.col("LL_F7-T3").rolling_max(window_size=window).alias("min20s_LL_F7-T3"),
    pl.col("LL_F7-T3").rolling_min(window_size=window).alias("max20s_LL_F7-T3"),
    pl.col("LL_F7-T3").rolling_std(window_size=window).alias("std20s_LL_F7-T3"),
    (pl.col("LL_F7-T3").rolling_max(window_size=window) - pl.col("LL_F7-T3").rolling_min(window_size=window)).alias("max-min_20s_LL_F7-T3"),
    
    pl.col("LL_T3-T5").rolling_mean(window_size=window).alias("mean20s_LL_T3-T5"),
    pl.col("LL_T3-T5").rolling_max(window_size=window).alias("min20s_LL_T3-T5"),
    pl.col("LL_T3-T5").rolling_min(window_size=window).alias("max20s_LL_T3-T5"),
    pl.col("LL_T3-T5").rolling_std(window_size=window).alias("std20s_LL_T3-T5"),
    (pl.col("LL_T3-T5").rolling_max(window_size=window) - pl.col("LL_T3-T5").rolling_min(window_size=window)).alias("max-min_20s_LL_T3-T5"),
    
    pl.col("LL_T5-O1").rolling_mean(window_size=window).alias("mean20s_LL_T5-O1"),
    pl.col("LL_T5-O1").rolling_max(window_size=window).alias("min20s_LL_T5-O1"),
    pl.col("LL_T5-O1").rolling_min(window_size=window).alias("max20s_LL_T5-O1"),
    pl.col("LL_T5-O1").rolling_std(window_size=window).alias("std20s_LL_T5-O1"),
    (pl.col("LL_T5-O1").rolling_max(window_size=window) - pl.col("LL_T5-O1").rolling_min(window_size=window)).alias("max-min_20s_LL_T5-O1"),
    
    pl.col("LP_Fp1-F7").rolling_mean(window_size=window).alias("mean20s_LP_Fp1-F7"),
    pl.col("LP_Fp1-F7").rolling_max(window_size=window).alias("min20s_LP_Fp1-F7"),
    pl.col("LP_Fp1-F7").rolling_min(window_size=window).alias("max20s_LP_Fp1-F7"),
    pl.col("LP_Fp1-F7").rolling_std(window_size=window).alias("std20s_LP_Fp1-F7"),
    (pl.col("LP_Fp1-F7").rolling_max(window_size=window) - pl.col("LP_Fp1-F7").rolling_min(window_size=window)).alias("max-min_20s_LP_Fp1-F7"),
    
    pl.col("LP_F3-C3").rolling_mean(window_size=window).alias("mean20s_LP_F3-C3"),
    pl.col("LP_F3-C3").rolling_max(window_size=window).alias("min20s_LP_F3-C3"),
    pl.col("LP_F3-C3").rolling_min(window_size=window).alias("max20s_LP_F3-C3"),
    pl.col("LP_F3-C3").rolling_std(window_size=window).alias("std20s_LP_F3-C3"),
    (pl.col("LP_F3-C3").rolling_max(window_size=window) - pl.col("LP_F3-C3").rolling_min(window_size=window)).alias("max-min_20s_LP_F3-C3"),
    
    pl.col("LP_C3-P3").rolling_mean(window_size=window).alias("mean20s_LP_C3-P3"),
    pl.col("LP_C3-P3").rolling_max(window_size=window).alias("min20s_LP_C3-P3"),
    pl.col("LP_C3-P3").rolling_min(window_size=window).alias("max20s_LP_C3-P3"),
    pl.col("LP_C3-P3").rolling_std(window_size=window).alias("std20s_LP_C3-P3"),
    (pl.col("LP_C3-P3").rolling_max(window_size=window) - pl.col("LP_C3-P3").rolling_min(window_size=window)).alias("max-min_20s_LP_C3-P3"),
    
    pl.col("LP_P3-O1").rolling_mean(window_size=window).alias("mean20s_LP_P3-O1"),
    pl.col("LP_P3-O1").rolling_max(window_size=window).alias("min20s_LP_P3-O1"),
    pl.col("LP_P3-O1").rolling_min(window_size=window).alias("max20s_LP_P3-O1"),
    pl.col("LP_P3-O1").rolling_std(window_size=window).alias("std20s_LP_P3-O1"),
    (pl.col("LP_P3-O1").rolling_max(window_size=window) - pl.col("LP_P3-O1").rolling_min(window_size=window)).alias("max-min_20s_LP_P3-O1"),
    
    pl.col("RP_Fp2-F4").rolling_mean(window_size=window).alias("mean20s_RP_Fp2-F4"),
    pl.col("RP_Fp2-F4").rolling_max(window_size=window).alias("min20s_RP_Fp2-F4"),
    pl.col("RP_Fp2-F4").rolling_min(window_size=window).alias("max20s_RP_Fp2-F4"),
    pl.col("RP_Fp2-F4").rolling_std(window_size=window).alias("std20s_RP_Fp2-F4"),
    (pl.col("RP_Fp2-F4").rolling_max(window_size=window) - pl.col("RP_Fp2-F4").rolling_min(window_size=window)).alias("max-min_20s_RP_Fp2-F4"),
    
    pl.col("RP_F4-C4").rolling_mean(window_size=window).alias("mean20s_RP_F4-C4"),
    pl.col("RP_F4-C4").rolling_max(window_size=window).alias("min20s_RP_F4-C4"),
    pl.col("RP_F4-C4").rolling_min(window_size=window).alias("max20s_RP_F4-C4"),
    pl.col("RP_F4-C4").rolling_std(window_size=window).alias("std20s_RP_F4-C4"),
    (pl.col("RP_F4-C4").rolling_max(window_size=window) - pl.col("RP_F4-C4").rolling_min(window_size=window)).alias("max-min_20s_RP_F4-C4"),
    
    pl.col("RP_C4-P4").rolling_mean(window_size=window).alias("mean20s_RP_C4-P4"),
    pl.col("RP_C4-P4").rolling_max(window_size=window).alias("min20s_RP_C4-P4"),
    pl.col("RP_C4-P4").rolling_min(window_size=window).alias("max20s_RP_C4-P4"),
    pl.col("RP_C4-P4").rolling_std(window_size=window).alias("std20s_RP_C4-P4"),
    (pl.col("RP_C4-P4").rolling_max(window_size=window) - pl.col("RP_C4-P4").rolling_min(window_size=window)).alias("max-min_20s_RP_C4-P4"),
    
    pl.col("RP_P4-O2").rolling_mean(window_size=window).alias("mean20s_RP_P4-O2"),
    pl.col("RP_P4-O2").rolling_max(window_size=window).alias("min20s_RP_P4-O2"),
    pl.col("RP_P4-O2").rolling_min(window_size=window).alias("max20s_RP_P4-O2"),
    pl.col("RP_P4-O2").rolling_std(window_size=window).alias("std20s_RP_P4-O2"),
    (pl.col("RP_P4-O2").rolling_max(window_size=window) - pl.col("RP_P4-O2").rolling_min(window_size=window)).alias("max-min_20s_RP_P4-O2"),
    
    pl.col("RR_Fp2-F8").rolling_mean(window_size=window).alias("mean20s_RR_Fp2-F8"),
    pl.col("RR_Fp2-F8").rolling_max(window_size=window).alias("min20s_RR_Fp2-F8"),
    pl.col("RR_Fp2-F8").rolling_min(window_size=window).alias("max20s_RR_Fp2-F8"),
    pl.col("RR_Fp2-F8").rolling_std(window_size=window).alias("std20s_RR_Fp2-F8"),
    (pl.col("RR_Fp2-F8").rolling_max(window_size=window) - pl.col("RR_Fp2-F8").rolling_min(window_size=window)).alias("max-min_20s_RR_Fp2-F8"),
    
    pl.col("RR_F8-T4").rolling_mean(window_size=window).alias("mean20s_RR_F8-T4"),
    pl.col("RR_F8-T4").rolling_max(window_size=window).alias("min20s_RR_F8-T4"),
    pl.col("RR_F8-T4").rolling_min(window_size=window).alias("max20s_RR_F8-T4"),
    pl.col("RR_F8-T4").rolling_std(window_size=window).alias("std20s_RR_F8-T4"),
    (pl.col("RR_F8-T4").rolling_max(window_size=window) - pl.col("RR_F8-T4").rolling_min(window_size=window)).alias("max-min_20s_RR_F8-T4"),
    
    pl.col("RR_T4-T6").rolling_mean(window_size=window).alias("mean20s_RR_T4-T6"),
    pl.col("RR_T4-T6").rolling_max(window_size=window).alias("min20s_RR_T4-T6"),
    pl.col("RR_T4-T6").rolling_min(window_size=window).alias("max20s_RR_T4-T6"),
    pl.col("RR_T4-T6").rolling_std(window_size=window).alias("std20s_RR_T4-T6"),
    (pl.col("RR_T4-T6").rolling_max(window_size=window) - pl.col("RR_T4-T6").rolling_min(window_size=window)).alias("max-min_20s_RR_T4-T6"),
    
    pl.col("RR_T6-O2").rolling_mean(window_size=window).alias("mean20s_RR_T6-O2"),
    pl.col("RR_T6-O2").rolling_max(window_size=window).alias("min20s_RR_T6-O2"),
    pl.col("RR_T6-O2").rolling_min(window_size=window).alias("max20s_RR_T6-O2"),
    pl.col("RR_T6-O2").rolling_std(window_size=window).alias("std20s_RR_T6-O2"),
    (pl.col("RR_T6-O2").rolling_max(window_size=window) - pl.col("RR_T6-O2").rolling_min(window_size=window)).alias("max-min_20s_RR_T6-O2"),
    
)

In [None]:
df_eeg.head()

Fp1,F3,C3,P3,F7,T3,T5,O1,Fz,Cz,Pz,Fp2,F4,C4,P4,F8,T4,T6,O2,EKG,seconds,offset_seconds,LL_Fp1-F7,LL_F7-T3,LL_T3-T5,LL_T5-O1,LP_Fp1-F7,LP_F3-C3,LP_C3-P3,LP_P3-O1,RP_Fp2-F4,RP_F4-C4,RP_C4-P4,RP_P4-O2,RR_Fp2-F8,RR_F8-T4,RR_T4-T6,…,std20s_RP_Fp2-F4,max-min_20s_RP_Fp2-F4,mean20s_RP_F4-C4,min20s_RP_F4-C4,max20s_RP_F4-C4,std20s_RP_F4-C4,max-min_20s_RP_F4-C4,mean20s_RP_C4-P4,min20s_RP_C4-P4,max20s_RP_C4-P4,std20s_RP_C4-P4,max-min_20s_RP_C4-P4,mean20s_RP_P4-O2,min20s_RP_P4-O2,max20s_RP_P4-O2,std20s_RP_P4-O2,max-min_20s_RP_P4-O2,mean20s_RR_Fp2-F8,min20s_RR_Fp2-F8,max20s_RR_Fp2-F8,std20s_RR_Fp2-F8,max-min_20s_RR_Fp2-F8,mean20s_RR_F8-T4,min20s_RR_F8-T4,max20s_RR_F8-T4,std20s_RR_F8-T4,max-min_20s_RR_F8-T4,mean20s_RR_T4-T6,min20s_RR_T4-T6,max20s_RR_T4-T6,std20s_RR_T4-T6,max-min_20s_RR_T4-T6,mean20s_RR_T6-O2,min20s_RR_T6-O2,max20s_RR_T6-O2,std20s_RR_T6-O2,max-min_20s_RR_T6-O2
f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f64,f64,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,…,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32
-75.360001,86.379997,65.470001,62.580002,170.350006,92.370003,39.68,89.870003,18.26,-28.440001,-15.19,16.93,13.81,-42.16,78.099998,-143.649994,121.239998,-1.47,72.550003,-3090.090088,0.0,-25.0,-245.710007,77.980003,52.690002,-50.190002,-161.73999,20.909996,2.889999,-27.290001,3.12,55.970001,-120.259995,5.549995,160.579987,-264.889984,122.709999,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
-107.739998,53.139999,32.59,21.950001,140.5,59.07,4.26,53.310001,-18.49,-63.27,-52.110001,-13.23,-16.42,-80.43,64.449997,-168.5,88.400002,-28.52,40.25,5560.439941,0.005,-24.995,-248.23999,81.43,54.809998,-49.050003,-160.880005,20.549999,10.639999,-31.360001,3.190001,64.010002,-144.880005,24.199997,155.270004,-256.899994,116.919998,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
-103.050003,46.330002,10.75,9.15,137.820007,50.950001,-3.5,45.07,-20.879999,-66.629997,-69.75,-9.2,-21.4,-93.089996,5.12,-176.089996,51.689999,-38.639999,31.82,-4161.450195,0.01,-24.99,-240.87001,86.87001,54.450001,-48.57,-149.380005,35.580002,1.6,-35.919998,12.2,71.689995,-98.209999,-26.700001,166.889999,-227.779999,90.330002,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
-75.099998,77.870003,40.150002,31.790001,166.630005,76.769997,17.99,67.650002,11.14,-32.950001,-38.509998,14.77,9.79,-66.480003,48.220001,-147.990005,86.629997,-18.639999,62.509998,31769.970703,0.015,-24.985,-241.730011,89.860008,58.779999,-49.660004,-152.970001,37.720001,8.360001,-35.860001,4.98,76.270004,-114.700005,-14.289997,162.76001,-234.619995,105.269997,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
-97.919998,58.209999,27.33,5.34,144.080002,47.200001,-10.43,40.25,-12.35,-52.5,-54.919998,-10.07,-8.04,-90.82,67.269997,-164.710007,80.68,-36.52,41.830002,6879.720215,0.02,-24.98,-242.0,96.880005,57.630001,-50.68,-156.130005,30.879999,21.99,-34.91,-2.03,82.779999,-158.089996,25.439995,154.640015,-245.390015,117.199997,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [None]:
df_eeg_diff = df_eeg.drop(["Fp1", "F3", "C3", "P3", "F7", "T3", "T5", "O1", "Fz", "Cz", "Pz", "Fp2", "F4", "C4", "P4", "F8", "T4", "T6", "O2", "EKG", "seconds"])
df_eeg_diff.head()

offset_seconds,LL_Fp1-F7,LL_F7-T3,LL_T3-T5,LL_T5-O1,LP_Fp1-F7,LP_F3-C3,LP_C3-P3,LP_P3-O1,RP_Fp2-F4,RP_F4-C4,RP_C4-P4,RP_P4-O2,RR_Fp2-F8,RR_F8-T4,RR_T4-T6,RR_T6-O2,mean20s_LL_Fp1-F7,min20s_LL_Fp1-F7,max20s_LL_Fp1-F7,std20s_LL_Fp1-F7,max-min_20s_LL_Fp1-F7,mean20s_LL_F7-T3,min20s_LL_F7-T3,max20s_LL_F7-T3,std20s_LL_F7-T3,max-min_20s_LL_F7-T3,mean20s_LL_T3-T5,min20s_LL_T3-T5,max20s_LL_T3-T5,std20s_LL_T3-T5,max-min_20s_LL_T3-T5,mean20s_LL_T5-O1,min20s_LL_T5-O1,max20s_LL_T5-O1,std20s_LL_T5-O1,max-min_20s_LL_T5-O1,…,std20s_RP_Fp2-F4,max-min_20s_RP_Fp2-F4,mean20s_RP_F4-C4,min20s_RP_F4-C4,max20s_RP_F4-C4,std20s_RP_F4-C4,max-min_20s_RP_F4-C4,mean20s_RP_C4-P4,min20s_RP_C4-P4,max20s_RP_C4-P4,std20s_RP_C4-P4,max-min_20s_RP_C4-P4,mean20s_RP_P4-O2,min20s_RP_P4-O2,max20s_RP_P4-O2,std20s_RP_P4-O2,max-min_20s_RP_P4-O2,mean20s_RR_Fp2-F8,min20s_RR_Fp2-F8,max20s_RR_Fp2-F8,std20s_RR_Fp2-F8,max-min_20s_RR_Fp2-F8,mean20s_RR_F8-T4,min20s_RR_F8-T4,max20s_RR_F8-T4,std20s_RR_F8-T4,max-min_20s_RR_F8-T4,mean20s_RR_T4-T6,min20s_RR_T4-T6,max20s_RR_T4-T6,std20s_RR_T4-T6,max-min_20s_RR_T4-T6,mean20s_RR_T6-O2,min20s_RR_T6-O2,max20s_RR_T6-O2,std20s_RR_T6-O2,max-min_20s_RR_T6-O2
f64,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,…,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32
-25.0,-245.710007,77.980003,52.690002,-50.190002,-161.73999,20.909996,2.889999,-27.290001,3.12,55.970001,-120.259995,5.549995,160.579987,-264.889984,122.709999,-74.020004,,,,,,,,,,,,,,,,,,,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
-24.995,-248.23999,81.43,54.809998,-49.050003,-160.880005,20.549999,10.639999,-31.360001,3.190001,64.010002,-144.880005,24.199997,155.270004,-256.899994,116.919998,-68.770004,,,,,,,,,,,,,,,,,,,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
-24.99,-240.87001,86.87001,54.450001,-48.57,-149.380005,35.580002,1.6,-35.919998,12.2,71.689995,-98.209999,-26.700001,166.889999,-227.779999,90.330002,-70.459999,,,,,,,,,,,,,,,,,,,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
-24.985,-241.730011,89.860008,58.779999,-49.660004,-152.970001,37.720001,8.360001,-35.860001,4.98,76.270004,-114.700005,-14.289997,162.76001,-234.619995,105.269997,-81.149994,,,,,,,,,,,,,,,,,,,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
-24.98,-242.0,96.880005,57.630001,-50.68,-156.130005,30.879999,21.99,-34.91,-2.03,82.779999,-158.089996,25.439995,154.640015,-245.390015,117.199997,-78.350006,,,,,,,,,,,,,,,,,,,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [None]:
df_eeg_diff.columns

['offset_seconds',
 'LL_Fp1-F7',
 'LL_F7-T3',
 'LL_T3-T5',
 'LL_T5-O1',
 'LP_Fp1-F7',
 'LP_F3-C3',
 'LP_C3-P3',
 'LP_P3-O1',
 'RP_Fp2-F4',
 'RP_F4-C4',
 'RP_C4-P4',
 'RP_P4-O2',
 'RR_Fp2-F8',
 'RR_F8-T4',
 'RR_T4-T6',
 'RR_T6-O2',
 'mean20s_LL_Fp1-F7',
 'min20s_LL_Fp1-F7',
 'max20s_LL_Fp1-F7',
 'std20s_LL_Fp1-F7',
 'max-min_20s_LL_Fp1-F7',
 'mean20s_LL_F7-T3',
 'min20s_LL_F7-T3',
 'max20s_LL_F7-T3',
 'std20s_LL_F7-T3',
 'max-min_20s_LL_F7-T3',
 'mean20s_LL_T3-T5',
 'min20s_LL_T3-T5',
 'max20s_LL_T3-T5',
 'std20s_LL_T3-T5',
 'max-min_20s_LL_T3-T5',
 'mean20s_LL_T5-O1',
 'min20s_LL_T5-O1',
 'max20s_LL_T5-O1',
 'std20s_LL_T5-O1',
 'max-min_20s_LL_T5-O1',
 'mean20s_LP_Fp1-F7',
 'min20s_LP_Fp1-F7',
 'max20s_LP_Fp1-F7',
 'std20s_LP_Fp1-F7',
 'max-min_20s_LP_Fp1-F7',
 'mean20s_LP_F3-C3',
 'min20s_LP_F3-C3',
 'max20s_LP_F3-C3',
 'std20s_LP_F3-C3',
 'max-min_20s_LP_F3-C3',
 'mean20s_LP_C3-P3',
 'min20s_LP_C3-P3',
 'max20s_LP_C3-P3',
 'std20s_LP_C3-P3',
 'max-min_20s_LP_C3-P3',
 'mean20s_LP_P3-O

In [None]:
df_train_eeg = df_train.join(df_eeg_diff, left_on = "eeg_label_offset_seconds", right_on = "offset_seconds", how = "left")
df_train_eeg.head()

eeg_id,eeg_sub_id,eeg_label_offset_seconds,spectrogram_id,spectrogram_sub_id,spectrogram_label_offset_seconds,label_id,patient_id,expert_consensus,seizure_vote,lpd_vote,gpd_vote,lrda_vote,grda_vote,other_vote,LL_Fp1-F7,LL_F7-T3,LL_T3-T5,LL_T5-O1,LP_Fp1-F7,LP_F3-C3,LP_C3-P3,LP_P3-O1,RP_Fp2-F4,RP_F4-C4,RP_C4-P4,RP_P4-O2,RR_Fp2-F8,RR_F8-T4,RR_T4-T6,RR_T6-O2,mean20s_LL_Fp1-F7,min20s_LL_Fp1-F7,max20s_LL_Fp1-F7,std20s_LL_Fp1-F7,max-min_20s_LL_Fp1-F7,mean20s_LL_F7-T3,…,std20s_RP_Fp2-F4,max-min_20s_RP_Fp2-F4,mean20s_RP_F4-C4,min20s_RP_F4-C4,max20s_RP_F4-C4,std20s_RP_F4-C4,max-min_20s_RP_F4-C4,mean20s_RP_C4-P4,min20s_RP_C4-P4,max20s_RP_C4-P4,std20s_RP_C4-P4,max-min_20s_RP_C4-P4,mean20s_RP_P4-O2,min20s_RP_P4-O2,max20s_RP_P4-O2,std20s_RP_P4-O2,max-min_20s_RP_P4-O2,mean20s_RR_Fp2-F8,min20s_RR_Fp2-F8,max20s_RR_Fp2-F8,std20s_RR_Fp2-F8,max-min_20s_RR_Fp2-F8,mean20s_RR_F8-T4,min20s_RR_F8-T4,max20s_RR_F8-T4,std20s_RR_F8-T4,max-min_20s_RR_F8-T4,mean20s_RR_T4-T6,min20s_RR_T4-T6,max20s_RR_T4-T6,std20s_RR_T4-T6,max-min_20s_RR_T4-T6,mean20s_RR_T6-O2,min20s_RR_T6-O2,max20s_RR_T6-O2,std20s_RR_T6-O2,max-min_20s_RR_T6-O2
i64,i64,f64,i64,i64,f64,i64,i64,str,i64,i64,i64,i64,i64,i64,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,…,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32
1628180742,0,0.0,353733,0,0.0,127492639,42516,"""Seizure""",3,0,0,0,0,0,62.989998,-205.699997,66.119995,-14.259998,29.190001,18.420002,-163.800003,25.340004,76.720001,-115.040001,-15.279999,-47.080002,59.02,-178.12001,-17.119995,35.540001,61.780102,342.559967,-152.279999,103.107872,494.839966,-107.433456,…,76.289032,637.630005,27.533443,812.419983,-323.079987,90.222954,1135.5,-15.082874,455.910004,-297.550018,143.690292,753.460022,-24.17841,341.130005,-405.639954,132.450363,746.769958,129.213898,268.970032,-63.970001,41.671181,332.940033,-126.269608,215.639984,-453.079987,125.583855,668.719971,17.916061,579.700012,-290.25,139.945419,869.950012,-11.638247,193.159988,-258.160004,67.333481,451.320007
1628180742,1,6.0,353733,1,6.0,3887563113,42516,"""Seizure""",3,0,0,0,0,0,18.790001,22.420006,18.009995,-54.669998,-56.449997,15.48,42.100002,3.419998,20.25,48.109997,-116.610001,65.709999,147.62001,-159.37001,41.52,-12.309999,39.797653,342.559967,-182.869995,110.716377,525.429932,-81.776917,…,81.877686,637.630005,30.455515,812.419983,-323.079987,92.761009,1135.5,-27.662664,455.910004,-347.089996,151.25528,803.0,-13.407331,341.130005,-405.639954,139.365265,746.769958,111.805649,268.970032,-63.970001,41.21344,332.940033,-119.875412,215.639984,-453.079987,127.47892,668.719971,29.192438,579.700012,-290.25,150.357468,869.950012,-14.071545,193.159988,-181.0,63.040646,374.159973
1628180742,2,8.0,353733,2,8.0,1142670488,42516,"""Seizure""",3,0,0,0,0,0,-6.300003,-74.009995,80.209991,-39.549995,-26.660004,-36.34,-28.719999,52.07,44.290001,-35.300003,144.320007,-97.93,127.470001,1.479996,-42.099998,-31.470001,41.47686,342.559967,-182.869995,110.118675,525.429932,-80.255226,…,81.641479,637.630005,24.875502,812.419983,-323.079987,93.224258,1135.5,-16.09198,455.910004,-347.089996,150.710129,803.0,-24.066389,341.130005,-405.639954,136.667404,746.769958,109.990799,268.970032,-63.970001,39.854912,332.940033,-105.59288,215.639984,-453.079987,125.102272,668.719971,19.007767,579.700012,-290.25,151.30069,869.950012,-14.955832,193.159988,-181.0,67.201035,374.159973
1628180742,3,18.0,353733,3,18.0,2718991173,42516,"""Seizure""",3,0,0,0,0,0,-10.73,-45.810001,36.48,-104.019989,10.18,36.189999,-85.759995,-84.689995,12.559998,101.57,-4.780001,-147.320007,64.790001,-45.590004,104.620003,-161.790009,-18.730085,204.25,-199.720001,74.421288,403.970001,-28.55604,…,78.258766,518.650024,38.985313,321.429993,-222.270004,88.075211,543.700012,-66.258827,298.919983,-393.550018,114.358383,692.469971,9.059397,297.630005,-274.23999,100.09063,571.869995,86.55188,228.880005,-40.589996,38.411232,269.470001,-121.632492,134.029999,-384.849976,114.455879,518.880005,49.830673,335.540009,-249.339996,125.330376,584.880005,-32.691612,180.470001,-192.939987,66.267426,373.409973
1628180742,4,24.0,353733,4,24.0,3080632009,42516,"""Seizure""",3,0,0,0,0,0,13.260002,-69.980003,31.790001,3.02,-26.689999,-6.0,-20.510002,31.290001,-14.259999,107.970001,16.75,-57.339996,56.419998,-81.339996,24.650002,53.390003,-6.375765,110.739998,-199.720001,54.895741,310.459991,-37.903515,…,64.223923,510.450012,39.210075,321.429993,-222.270004,78.86515,543.700012,-42.550224,298.919983,-393.550018,109.577835,692.469971,-18.170519,289.320007,-274.23999,92.941933,563.559998,76.416374,228.880005,-68.37999,40.918865,297.26001,-97.123856,124.150002,-384.849976,95.622894,508.999969,36.773731,335.540009,-249.339996,102.06546,584.880005,-33.698002,180.470001,-232.62001,78.276169,413.090027


In [None]:
df_train_eeg.filter(pl.col("eeg_id") == 568657)

eeg_id,eeg_sub_id,eeg_label_offset_seconds,spectrogram_id,spectrogram_sub_id,spectrogram_label_offset_seconds,label_id,patient_id,expert_consensus,seizure_vote,lpd_vote,gpd_vote,lrda_vote,grda_vote,other_vote,LL_Fp1-F7,LL_F7-T3,LL_T3-T5,LL_T5-O1,LP_Fp1-F7,LP_F3-C3,LP_C3-P3,LP_P3-O1,RP_Fp2-F4,RP_F4-C4,RP_C4-P4,RP_P4-O2,RR_Fp2-F8,RR_F8-T4,RR_T4-T6,RR_T6-O2,mean20s_LL_Fp1-F7,min20s_LL_Fp1-F7,max20s_LL_Fp1-F7,std20s_LL_Fp1-F7,max-min_20s_LL_Fp1-F7,mean20s_LL_F7-T3,…,std20s_RP_Fp2-F4,max-min_20s_RP_Fp2-F4,mean20s_RP_F4-C4,min20s_RP_F4-C4,max20s_RP_F4-C4,std20s_RP_F4-C4,max-min_20s_RP_F4-C4,mean20s_RP_C4-P4,min20s_RP_C4-P4,max20s_RP_C4-P4,std20s_RP_C4-P4,max-min_20s_RP_C4-P4,mean20s_RP_P4-O2,min20s_RP_P4-O2,max20s_RP_P4-O2,std20s_RP_P4-O2,max-min_20s_RP_P4-O2,mean20s_RR_Fp2-F8,min20s_RR_Fp2-F8,max20s_RR_Fp2-F8,std20s_RR_Fp2-F8,max-min_20s_RR_Fp2-F8,mean20s_RR_F8-T4,min20s_RR_F8-T4,max20s_RR_F8-T4,std20s_RR_F8-T4,max-min_20s_RR_F8-T4,mean20s_RR_T4-T6,min20s_RR_T4-T6,max20s_RR_T4-T6,std20s_RR_T4-T6,max-min_20s_RR_T4-T6,mean20s_RR_T6-O2,min20s_RR_T6-O2,max20s_RR_T6-O2,std20s_RR_T6-O2,max-min_20s_RR_T6-O2
i64,i64,f64,i64,i64,f64,i64,i64,str,i64,i64,i64,i64,i64,i64,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,…,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32
568657,0,0.0,789577333,0,0.0,1825637311,20654,"""Other""",0,0,3,0,2,7,62.989998,-205.699997,66.119995,-14.259998,29.190001,18.420002,-163.800003,25.340004,76.720001,-115.040001,-15.279999,-47.080002,59.02,-178.12001,-17.119995,35.540001,61.780102,342.559967,-152.279999,103.107872,494.839966,-107.433456,…,76.289032,637.630005,27.533443,812.419983,-323.079987,90.222954,1135.5,-15.082874,455.910004,-297.550018,143.690292,753.460022,-24.17841,341.130005,-405.639954,132.450363,746.769958,129.213898,268.970032,-63.970001,41.671181,332.940033,-126.269608,215.639984,-453.079987,125.583855,668.719971,17.916061,579.700012,-290.25,139.945419,869.950012,-11.638247,193.159988,-258.160004,67.333481,451.320007
568657,1,6.0,789577333,1,6.0,3640441665,20654,"""Other""",0,0,3,0,2,7,18.790001,22.420006,18.009995,-54.669998,-56.449997,15.48,42.100002,3.419998,20.25,48.109997,-116.610001,65.709999,147.62001,-159.37001,41.52,-12.309999,39.797653,342.559967,-182.869995,110.716377,525.429932,-81.776917,…,81.877686,637.630005,30.455515,812.419983,-323.079987,92.761009,1135.5,-27.662664,455.910004,-347.089996,151.25528,803.0,-13.407331,341.130005,-405.639954,139.365265,746.769958,111.805649,268.970032,-63.970001,41.21344,332.940033,-119.875412,215.639984,-453.079987,127.47892,668.719971,29.192438,579.700012,-290.25,150.357468,869.950012,-14.071545,193.159988,-181.0,63.040646,374.159973
568657,2,12.0,789577333,2,12.0,1364530340,20654,"""Other""",0,0,3,0,2,7,-158.819992,105.679993,49.869999,-90.209999,-13.04,-86.370003,3.080002,2.849998,10.49,44.980003,-189.450012,-8.009995,57.43,-25.73,-32.18,-141.509995,23.858727,342.559967,-191.240005,115.225914,533.799988,-64.08902,…,89.29274,637.630005,37.149303,812.419983,-323.079987,99.361046,1135.5,-23.568499,455.910004,-347.089996,158.408188,803.0,-10.346552,341.130005,-405.639954,141.061066,746.769958,100.200615,268.970032,-63.970001,39.702759,332.940033,-123.352112,215.639984,-453.079987,133.029526,668.719971,53.671261,579.700012,-280.609985,155.469055,860.309998,-20.593449,193.159988,-177.389999,65.572784,370.549988
568657,3,16.0,789577333,3,16.0,1874525225,20654,"""Other""",0,0,3,0,2,7,-33.91,-12.280001,7.309999,-5.059999,-30.740002,-119.849998,83.009995,23.640001,-68.460007,49.730003,-60.989998,98.360001,42.650002,-84.879997,61.419998,-0.550001,-19.452307,204.25,-199.720001,75.048492,403.970001,-28.927416,…,78.17485,443.779999,47.200874,470.019989,-222.270004,92.527679,692.289978,-70.259682,298.919983,-393.550018,113.714897,692.469971,16.200289,297.630005,-274.23999,99.762177,571.869995,91.467094,268.970032,-54.26001,37.38319,323.230042,-134.856674,134.029999,-384.849976,113.709175,518.880005,65.765511,579.700012,-249.339996,134.343781,829.040039,-33.018425,180.470001,-180.300003,64.838318,360.77002


In [None]:
collist = [
 'eeg_id',
 'offset_seconds',
#  'LL_Fp1-F7',
#  'LL_F7-T3',
#  'LL_T3-T5',
#  'LL_T5-O1',
#  'LP_Fp1-F7',
#  'LP_F3-C3',
#  'LP_C3-P3',
#  'LP_P3-O1',
#  'RP_Fp2-F4',
#  'RP_F4-C4',
#  'RP_C4-P4',
#  'RP_P4-O2',
#  'RR_Fp2-F8',
#  'RR_F8-T4',
#  'RR_T4-T6',
#  'RR_T6-O2',
 'mean20s_LL_Fp1-F7',
 'min20s_LL_Fp1-F7',
 'max20s_LL_Fp1-F7',
 'std20s_LL_Fp1-F7',
 'max-min_20s_LL_Fp1-F7',
 'mean20s_LL_F7-T3',
 'min20s_LL_F7-T3',
 'max20s_LL_F7-T3',
 'std20s_LL_F7-T3',
 'max-min_20s_LL_F7-T3',
 'mean20s_LL_T3-T5',
 'min20s_LL_T3-T5',
 'max20s_LL_T3-T5',
 'std20s_LL_T3-T5',
 'max-min_20s_LL_T3-T5',
 'mean20s_LL_T5-O1',
 'min20s_LL_T5-O1',
 'max20s_LL_T5-O1',
 'std20s_LL_T5-O1',
 'max-min_20s_LL_T5-O1',
 'mean20s_LP_Fp1-F7',
 'min20s_LP_Fp1-F7',
 'max20s_LP_Fp1-F7',
 'std20s_LP_Fp1-F7',
 'max-min_20s_LP_Fp1-F7',
 'mean20s_LP_F3-C3',
 'min20s_LP_F3-C3',
 'max20s_LP_F3-C3',
 'std20s_LP_F3-C3',
 'max-min_20s_LP_F3-C3',
 'mean20s_LP_C3-P3',
 'min20s_LP_C3-P3',
 'max20s_LP_C3-P3',
 'std20s_LP_C3-P3',
 'max-min_20s_LP_C3-P3',
 'mean20s_LP_P3-O1',
 'min20s_LP_P3-O1',
 'max20s_LP_P3-O1',
 'std20s_LP_P3-O1',
 'max-min_20s_LP_P3-O1',
 'mean20s_RP_Fp2-F4',
 'min20s_RP_Fp2-F4',
 'max20s_RP_Fp2-F4',
 'std20s_RP_Fp2-F4',
 'max-min_20s_RP_Fp2-F4',
 'mean20s_RP_F4-C4',
 'min20s_RP_F4-C4',
 'max20s_RP_F4-C4',
 'std20s_RP_F4-C4',
 'max-min_20s_RP_F4-C4',
 'mean20s_RP_C4-P4',
 'min20s_RP_C4-P4',
 'max20s_RP_C4-P4',
 'std20s_RP_C4-P4',
 'max-min_20s_RP_C4-P4',
 'mean20s_RP_P4-O2',
 'min20s_RP_P4-O2',
 'max20s_RP_P4-O2',
 'std20s_RP_P4-O2',
 'max-min_20s_RP_P4-O2',
 'mean20s_RR_Fp2-F8',
 'min20s_RR_Fp2-F8',
 'max20s_RR_Fp2-F8',
 'std20s_RR_Fp2-F8',
 'max-min_20s_RR_Fp2-F8',
 'mean20s_RR_F8-T4',
 'min20s_RR_F8-T4',
 'max20s_RR_F8-T4',
 'std20s_RR_F8-T4',
 'max-min_20s_RR_F8-T4',
 'mean20s_RR_T4-T6',
 'min20s_RR_T4-T6',
 'max20s_RR_T4-T6',
 'std20s_RR_T4-T6',
 'max-min_20s_RR_T4-T6',
 'mean20s_RR_T6-O2',
 'min20s_RR_T6-O2',
 'max20s_RR_T6-O2',
 'std20s_RR_T6-O2',
 'max-min_20s_RR_T6-O2']

In [None]:
df_eeg_all = pl.DataFrame(schema= {
 'eeg_id': pl.Int64,
 'offset_seconds': pl.Float64,
#  'LL_Fp1-F7',
#  'LL_F7-T3',
#  'LL_T3-T5',
#  'LL_T5-O1',
#  'LP_Fp1-F7',
#  'LP_F3-C3',
#  'LP_C3-P3',
#  'LP_P3-O1',
#  'RP_Fp2-F4',
#  'RP_F4-C4',
#  'RP_C4-P4',
#  'RP_P4-O2',
#  'RR_Fp2-F8',
#  'RR_F8-T4',
#  'RR_T4-T6',
#  'RR_T6-O2',
 'mean20s_LL_Fp1-F7': pl.Float32,
 'min20s_LL_Fp1-F7': pl.Float32,
 'max20s_LL_Fp1-F7': pl.Float32,
 'std20s_LL_Fp1-F7': pl.Float32,
 'max-min_20s_LL_Fp1-F7': pl.Float32,
 'mean20s_LL_F7-T3': pl.Float32,
 'min20s_LL_F7-T3': pl.Float32,
 'max20s_LL_F7-T3': pl.Float32,
 'std20s_LL_F7-T3': pl.Float32,
 'max-min_20s_LL_F7-T3': pl.Float32,
 'mean20s_LL_T3-T5': pl.Float32,
 'min20s_LL_T3-T5': pl.Float32,
 'max20s_LL_T3-T5': pl.Float32,
 'std20s_LL_T3-T5': pl.Float32,
 'max-min_20s_LL_T3-T5': pl.Float32,
 'mean20s_LL_T5-O1': pl.Float32,
 'min20s_LL_T5-O1': pl.Float32,
 'max20s_LL_T5-O1': pl.Float32,
 'std20s_LL_T5-O1': pl.Float32,
 'max-min_20s_LL_T5-O1': pl.Float32,
 'mean20s_LP_Fp1-F7': pl.Float32,
 'min20s_LP_Fp1-F7': pl.Float32,
 'max20s_LP_Fp1-F7': pl.Float32,
 'std20s_LP_Fp1-F7': pl.Float32,
 'max-min_20s_LP_Fp1-F7': pl.Float32,
 'mean20s_LP_F3-C3': pl.Float32,
 'min20s_LP_F3-C3': pl.Float32,
 'max20s_LP_F3-C3': pl.Float32,
 'std20s_LP_F3-C3': pl.Float32,
 'max-min_20s_LP_F3-C3': pl.Float32,
 'mean20s_LP_C3-P3': pl.Float32,
 'min20s_LP_C3-P3': pl.Float32,
 'max20s_LP_C3-P3': pl.Float32,
 'std20s_LP_C3-P3': pl.Float32,
 'max-min_20s_LP_C3-P3': pl.Float32,
 'mean20s_LP_P3-O1': pl.Float32,
 'min20s_LP_P3-O1': pl.Float32,
 'max20s_LP_P3-O1': pl.Float32,
 'std20s_LP_P3-O1': pl.Float32,
 'max-min_20s_LP_P3-O1': pl.Float32,
 'mean20s_RP_Fp2-F4': pl.Float32,
 'min20s_RP_Fp2-F4': pl.Float32,
 'max20s_RP_Fp2-F4': pl.Float32,
 'std20s_RP_Fp2-F4': pl.Float32,
 'max-min_20s_RP_Fp2-F4': pl.Float32,
 'mean20s_RP_F4-C4': pl.Float32,
 'min20s_RP_F4-C4': pl.Float32,
 'max20s_RP_F4-C4': pl.Float32,
 'std20s_RP_F4-C4': pl.Float32,
 'max-min_20s_RP_F4-C4': pl.Float32,
 'mean20s_RP_C4-P4': pl.Float32,
 'min20s_RP_C4-P4': pl.Float32,
 'max20s_RP_C4-P4': pl.Float32,
 'std20s_RP_C4-P4': pl.Float32,
 'max-min_20s_RP_C4-P4': pl.Float32,
 'mean20s_RP_P4-O2': pl.Float32,
 'min20s_RP_P4-O2': pl.Float32,
 'max20s_RP_P4-O2': pl.Float32,
 'std20s_RP_P4-O2': pl.Float32,
 'max-min_20s_RP_P4-O2': pl.Float32,
 'mean20s_RR_Fp2-F8': pl.Float32,
 'min20s_RR_Fp2-F8': pl.Float32,
 'max20s_RR_Fp2-F8': pl.Float32,
 'std20s_RR_Fp2-F8': pl.Float32,
 'max-min_20s_RR_Fp2-F8': pl.Float32,
 'mean20s_RR_F8-T4': pl.Float32,
 'min20s_RR_F8-T4': pl.Float32,
 'max20s_RR_F8-T4': pl.Float32,
 'std20s_RR_F8-T4': pl.Float32,
 'max-min_20s_RR_F8-T4': pl.Float32,
 'mean20s_RR_T4-T6': pl.Float32,
 'min20s_RR_T4-T6': pl.Float32,
 'max20s_RR_T4-T6': pl.Float32,
 'std20s_RR_T4-T6': pl.Float32,
 'max-min_20s_RR_T4-T6': pl.Float32,
 'mean20s_RR_T6-O2': pl.Float32,
 'min20s_RR_T6-O2': pl.Float32,
 'max20s_RR_T6-O2': pl.Float32,
 'std20s_RR_T6-O2': pl.Float32,
 'max-min_20s_RR_T6-O2': pl.Float32})


In [None]:
%%time
PATH = '../input/train_eegs/'
files = os.listdir(PATH)
print(f'There are {len(files)} eeg parquet files')


for i, f in enumerate(files):

    if i%100==0: print(i, ', ', end='')
    df_eeg = pl.read_parquet(f'{PATH}{f}')
    
    # eeg_id を取得
    name = int(f.split('.')[0])

    eeg_id_list = [name] * len(df_eeg)

    # eeg_id をdfにいれる
    df_eeg = df_eeg.with_columns(pl.Series(eeg_id_list).alias("eeg_id"))


    ##### 特徴量作成


    # train データの各EEGデータとそろえるための時間のキーをつくる
    t = 1/200                                   # EEGのサンプリング周波数は200Hzなので、1行は1/200秒ごとの計測値を表す  
    seconds = [i*t for i in range(len(df_eeg))]
    df_eeg = df_eeg.with_columns(pl.Series(seconds).alias("seconds")) # 各行のEEGサンプリング開始からの秒数
    df_eeg = df_eeg.with_columns((pl.col("seconds") - 25).alias("offset_seconds")) # df_trainには、サンプリングをおこなった時間幅[T-25, T+25]のうち、計測開始時間 T-25 がoffsetとして存在する




    # 差分特徴量を作る
    df_eeg = df_eeg.with_columns(
        (pl.col("Fp1") - pl.col("F7")).alias("LL_Fp1-F7"),
        (pl.col("F7") - pl.col("T3")).alias("LL_F7-T3"),
        (pl.col("T3") - pl.col("T5")).alias("LL_T3-T5"),
        (pl.col("T5") - pl.col("O1")).alias("LL_T5-O1"),
        (pl.col("Fp1") - pl.col("F3")).alias("LP_Fp1-F7"),
        (pl.col("F3") - pl.col("C3")).alias("LP_F3-C3"),
        (pl.col("C3") - pl.col("P3")).alias("LP_C3-P3"),
        (pl.col("P3") - pl.col("O1")).alias("LP_P3-O1"),
        (pl.col("Fp2") - pl.col("F4")).alias("RP_Fp2-F4"),

        
        (pl.col("F4") - pl.col("C4")).alias("RP_F4-C4"),
        (pl.col("C4") - pl.col("P4")).alias("RP_C4-P4"),
        (pl.col("P4") - pl.col("O2")).alias("RP_P4-O2"),
        (pl.col("Fp2") - pl.col("F8")).alias("RR_Fp2-F8"),
        (pl.col("F8") - pl.col("T4")).alias("RR_F8-T4"),
        (pl.col("T4") - pl.col("T6")).alias("RR_T4-T6"),
        (pl.col("T6") - pl.col("O2")).alias("RR_T6-O2"),
    )

    # 差分特徴量の集計特徴量をつくる
    window = 4000 # 20秒窓 (20秒 / 1レコード1/200秒 = 4000レコード)
    df_eeg = df_eeg.with_columns(
        pl.col("LL_Fp1-F7").rolling_mean(window_size=window).alias("mean20s_LL_Fp1-F7"),
        pl.col("LL_Fp1-F7").rolling_max(window_size=window).alias("min20s_LL_Fp1-F7"),
        pl.col("LL_Fp1-F7").rolling_min(window_size=window).alias("max20s_LL_Fp1-F7"),
        pl.col("LL_Fp1-F7").rolling_std(window_size=window).alias("std20s_LL_Fp1-F7"),
        (pl.col("LL_Fp1-F7").rolling_max(window_size=window) - pl.col("LL_Fp1-F7").rolling_min(window_size=window)).alias("max-min_20s_LL_Fp1-F7"),
        
        pl.col("LL_F7-T3").rolling_mean(window_size=window).alias("mean20s_LL_F7-T3"),
        pl.col("LL_F7-T3").rolling_max(window_size=window).alias("min20s_LL_F7-T3"),
        pl.col("LL_F7-T3").rolling_min(window_size=window).alias("max20s_LL_F7-T3"),
        pl.col("LL_F7-T3").rolling_std(window_size=window).alias("std20s_LL_F7-T3"),
        (pl.col("LL_F7-T3").rolling_max(window_size=window) - pl.col("LL_F7-T3").rolling_min(window_size=window)).alias("max-min_20s_LL_F7-T3"),
        
        pl.col("LL_T3-T5").rolling_mean(window_size=window).alias("mean20s_LL_T3-T5"),
        pl.col("LL_T3-T5").rolling_max(window_size=window).alias("min20s_LL_T3-T5"),
        pl.col("LL_T3-T5").rolling_min(window_size=window).alias("max20s_LL_T3-T5"),
        pl.col("LL_T3-T5").rolling_std(window_size=window).alias("std20s_LL_T3-T5"),
        (pl.col("LL_T3-T5").rolling_max(window_size=window) - pl.col("LL_T3-T5").rolling_min(window_size=window)).alias("max-min_20s_LL_T3-T5"),
        
        pl.col("LL_T5-O1").rolling_mean(window_size=window).alias("mean20s_LL_T5-O1"),
        pl.col("LL_T5-O1").rolling_max(window_size=window).alias("min20s_LL_T5-O1"),
        pl.col("LL_T5-O1").rolling_min(window_size=window).alias("max20s_LL_T5-O1"),
        pl.col("LL_T5-O1").rolling_std(window_size=window).alias("std20s_LL_T5-O1"),
        (pl.col("LL_T5-O1").rolling_max(window_size=window) - pl.col("LL_T5-O1").rolling_min(window_size=window)).alias("max-min_20s_LL_T5-O1"),
        
        pl.col("LP_Fp1-F7").rolling_mean(window_size=window).alias("mean20s_LP_Fp1-F7"),
        pl.col("LP_Fp1-F7").rolling_max(window_size=window).alias("min20s_LP_Fp1-F7"),
        pl.col("LP_Fp1-F7").rolling_min(window_size=window).alias("max20s_LP_Fp1-F7"),
        pl.col("LP_Fp1-F7").rolling_std(window_size=window).alias("std20s_LP_Fp1-F7"),
        (pl.col("LP_Fp1-F7").rolling_max(window_size=window) - pl.col("LP_Fp1-F7").rolling_min(window_size=window)).alias("max-min_20s_LP_Fp1-F7"),
        
        pl.col("LP_F3-C3").rolling_mean(window_size=window).alias("mean20s_LP_F3-C3"),
        pl.col("LP_F3-C3").rolling_max(window_size=window).alias("min20s_LP_F3-C3"),
        pl.col("LP_F3-C3").rolling_min(window_size=window).alias("max20s_LP_F3-C3"),
        pl.col("LP_F3-C3").rolling_std(window_size=window).alias("std20s_LP_F3-C3"),
        (pl.col("LP_F3-C3").rolling_max(window_size=window) - pl.col("LP_F3-C3").rolling_min(window_size=window)).alias("max-min_20s_LP_F3-C3"),
        
        pl.col("LP_C3-P3").rolling_mean(window_size=window).alias("mean20s_LP_C3-P3"),
        pl.col("LP_C3-P3").rolling_max(window_size=window).alias("min20s_LP_C3-P3"),
        pl.col("LP_C3-P3").rolling_min(window_size=window).alias("max20s_LP_C3-P3"),
        pl.col("LP_C3-P3").rolling_std(window_size=window).alias("std20s_LP_C3-P3"),
        (pl.col("LP_C3-P3").rolling_max(window_size=window) - pl.col("LP_C3-P3").rolling_min(window_size=window)).alias("max-min_20s_LP_C3-P3"),
        
        pl.col("LP_P3-O1").rolling_mean(window_size=window).alias("mean20s_LP_P3-O1"),
        pl.col("LP_P3-O1").rolling_max(window_size=window).alias("min20s_LP_P3-O1"),
        pl.col("LP_P3-O1").rolling_min(window_size=window).alias("max20s_LP_P3-O1"),
        pl.col("LP_P3-O1").rolling_std(window_size=window).alias("std20s_LP_P3-O1"),
        (pl.col("LP_P3-O1").rolling_max(window_size=window) - pl.col("LP_P3-O1").rolling_min(window_size=window)).alias("max-min_20s_LP_P3-O1"),
        
        pl.col("RP_Fp2-F4").rolling_mean(window_size=window).alias("mean20s_RP_Fp2-F4"),
        pl.col("RP_Fp2-F4").rolling_max(window_size=window).alias("min20s_RP_Fp2-F4"),
        pl.col("RP_Fp2-F4").rolling_min(window_size=window).alias("max20s_RP_Fp2-F4"),
        pl.col("RP_Fp2-F4").rolling_std(window_size=window).alias("std20s_RP_Fp2-F4"),
        (pl.col("RP_Fp2-F4").rolling_max(window_size=window) - pl.col("RP_Fp2-F4").rolling_min(window_size=window)).alias("max-min_20s_RP_Fp2-F4"),
        
        pl.col("RP_F4-C4").rolling_mean(window_size=window).alias("mean20s_RP_F4-C4"),
        pl.col("RP_F4-C4").rolling_max(window_size=window).alias("min20s_RP_F4-C4"),
        pl.col("RP_F4-C4").rolling_min(window_size=window).alias("max20s_RP_F4-C4"),
        pl.col("RP_F4-C4").rolling_std(window_size=window).alias("std20s_RP_F4-C4"),
        (pl.col("RP_F4-C4").rolling_max(window_size=window) - pl.col("RP_F4-C4").rolling_min(window_size=window)).alias("max-min_20s_RP_F4-C4"),
        
        pl.col("RP_C4-P4").rolling_mean(window_size=window).alias("mean20s_RP_C4-P4"),
        pl.col("RP_C4-P4").rolling_max(window_size=window).alias("min20s_RP_C4-P4"),
        pl.col("RP_C4-P4").rolling_min(window_size=window).alias("max20s_RP_C4-P4"),
        pl.col("RP_C4-P4").rolling_std(window_size=window).alias("std20s_RP_C4-P4"),
        (pl.col("RP_C4-P4").rolling_max(window_size=window) - pl.col("RP_C4-P4").rolling_min(window_size=window)).alias("max-min_20s_RP_C4-P4"),
        
        pl.col("RP_P4-O2").rolling_mean(window_size=window).alias("mean20s_RP_P4-O2"),
        pl.col("RP_P4-O2").rolling_max(window_size=window).alias("min20s_RP_P4-O2"),
        pl.col("RP_P4-O2").rolling_min(window_size=window).alias("max20s_RP_P4-O2"),
        pl.col("RP_P4-O2").rolling_std(window_size=window).alias("std20s_RP_P4-O2"),
        (pl.col("RP_P4-O2").rolling_max(window_size=window) - pl.col("RP_P4-O2").rolling_min(window_size=window)).alias("max-min_20s_RP_P4-O2"),
        
        pl.col("RR_Fp2-F8").rolling_mean(window_size=window).alias("mean20s_RR_Fp2-F8"),
        pl.col("RR_Fp2-F8").rolling_max(window_size=window).alias("min20s_RR_Fp2-F8"),
        pl.col("RR_Fp2-F8").rolling_min(window_size=window).alias("max20s_RR_Fp2-F8"),
        pl.col("RR_Fp2-F8").rolling_std(window_size=window).alias("std20s_RR_Fp2-F8"),
        (pl.col("RR_Fp2-F8").rolling_max(window_size=window) - pl.col("RR_Fp2-F8").rolling_min(window_size=window)).alias("max-min_20s_RR_Fp2-F8"),
        
        pl.col("RR_F8-T4").rolling_mean(window_size=window).alias("mean20s_RR_F8-T4"),
        pl.col("RR_F8-T4").rolling_max(window_size=window).alias("min20s_RR_F8-T4"),
        pl.col("RR_F8-T4").rolling_min(window_size=window).alias("max20s_RR_F8-T4"),
        pl.col("RR_F8-T4").rolling_std(window_size=window).alias("std20s_RR_F8-T4"),
        (pl.col("RR_F8-T4").rolling_max(window_size=window) - pl.col("RR_F8-T4").rolling_min(window_size=window)).alias("max-min_20s_RR_F8-T4"),
        
        pl.col("RR_T4-T6").rolling_mean(window_size=window).alias("mean20s_RR_T4-T6"),
        pl.col("RR_T4-T6").rolling_max(window_size=window).alias("min20s_RR_T4-T6"),
        pl.col("RR_T4-T6").rolling_min(window_size=window).alias("max20s_RR_T4-T6"),
        pl.col("RR_T4-T6").rolling_std(window_size=window).alias("std20s_RR_T4-T6"),
        (pl.col("RR_T4-T6").rolling_max(window_size=window) - pl.col("RR_T4-T6").rolling_min(window_size=window)).alias("max-min_20s_RR_T4-T6"),
        
        pl.col("RR_T6-O2").rolling_mean(window_size=window).alias("mean20s_RR_T6-O2"),
        pl.col("RR_T6-O2").rolling_max(window_size=window).alias("min20s_RR_T6-O2"),
        pl.col("RR_T6-O2").rolling_min(window_size=window).alias("max20s_RR_T6-O2"),
        pl.col("RR_T6-O2").rolling_std(window_size=window).alias("std20s_RR_T6-O2"),
        (pl.col("RR_T6-O2").rolling_max(window_size=window) - pl.col("RR_T6-O2").rolling_min(window_size=window)).alias("max-min_20s_RR_T6-O2"),
        
    )

    df_eeg = df_eeg.select(collist)
    df_eeg.head()

    # df_eeg_all = df_eeg_all.vstack(df_eeg)
    df_eeg_all = pl.concat([df_eeg_all, df_eeg], how = 'vertical')

    # if i > 10:
    #     break

There are 17300 eeg parquet files
0 , 100 , 200 , 300 , 400 , 500 , 600 , 700 , 800 , 900 , 1000 , 1100 , 1200 , 1300 , 1400 , 1500 , 

KeyboardInterrupt: 

In [None]:
df_train_eeg = df_train.join(df_eeg_all, left_on = ["eeg_id", "eeg_label_offset_seconds"], right_on = ["eeg_id", "offset_seconds"], how = "left")
df_train_eeg.head()

eeg_id,eeg_sub_id,eeg_label_offset_seconds,spectrogram_id,spectrogram_sub_id,spectrogram_label_offset_seconds,label_id,patient_id,expert_consensus,seizure_vote,lpd_vote,gpd_vote,lrda_vote,grda_vote,other_vote,mean20s_LL_Fp1-F7,min20s_LL_Fp1-F7,max20s_LL_Fp1-F7,std20s_LL_Fp1-F7,max-min_20s_LL_Fp1-F7,mean20s_LL_F7-T3,min20s_LL_F7-T3,max20s_LL_F7-T3,std20s_LL_F7-T3,max-min_20s_LL_F7-T3,mean20s_LL_T3-T5,min20s_LL_T3-T5,max20s_LL_T3-T5,std20s_LL_T3-T5,max-min_20s_LL_T3-T5,mean20s_LL_T5-O1,min20s_LL_T5-O1,max20s_LL_T5-O1,std20s_LL_T5-O1,max-min_20s_LL_T5-O1,mean20s_LP_Fp1-F7,min20s_LP_Fp1-F7,…,std20s_RP_Fp2-F4,max-min_20s_RP_Fp2-F4,mean20s_RP_F4-C4,min20s_RP_F4-C4,max20s_RP_F4-C4,std20s_RP_F4-C4,max-min_20s_RP_F4-C4,mean20s_RP_C4-P4,min20s_RP_C4-P4,max20s_RP_C4-P4,std20s_RP_C4-P4,max-min_20s_RP_C4-P4,mean20s_RP_P4-O2,min20s_RP_P4-O2,max20s_RP_P4-O2,std20s_RP_P4-O2,max-min_20s_RP_P4-O2,mean20s_RR_Fp2-F8,min20s_RR_Fp2-F8,max20s_RR_Fp2-F8,std20s_RR_Fp2-F8,max-min_20s_RR_Fp2-F8,mean20s_RR_F8-T4,min20s_RR_F8-T4,max20s_RR_F8-T4,std20s_RR_F8-T4,max-min_20s_RR_F8-T4,mean20s_RR_T4-T6,min20s_RR_T4-T6,max20s_RR_T4-T6,std20s_RR_T4-T6,max-min_20s_RR_T4-T6,mean20s_RR_T6-O2,min20s_RR_T6-O2,max20s_RR_T6-O2,std20s_RR_T6-O2,max-min_20s_RR_T6-O2
i64,i64,f64,i64,i64,f64,i64,i64,str,i64,i64,i64,i64,i64,i64,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,…,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32
1628180742,0,0.0,353733,0,0.0,127492639,42516,"""Seizure""",3,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1628180742,1,6.0,353733,1,6.0,3887563113,42516,"""Seizure""",3,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1628180742,2,8.0,353733,2,8.0,1142670488,42516,"""Seizure""",3,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1628180742,3,18.0,353733,3,18.0,2718991173,42516,"""Seizure""",3,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1628180742,4,24.0,353733,4,24.0,3080632009,42516,"""Seizure""",3,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
