## EfficientNetB0 - infer

Kaggle Notebookでimportする via. github

- https://www.kaggle.com/code/cdeotte/efficientnetb0-starter-lb-0-43

ローカルで動かす際は `$ cp output/*.h5 input/hms-efficientnet-b0/`

### Configration

In [1]:
# モジュールの動的import(import先のファイルが更新されたときに追従する)
%load_ext autoreload
%autoreload 2

In [2]:
import os, sys, gc

import numpy as np
import polars as pl
import pandas as pd

import tensorflow as tf
import matplotlib.pyplot as plt

print("TensorFlow version =", tf.__version__)

2024-03-02 23:27:40.686125: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-03-02 23:27:40.707095: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


TensorFlow version = 2.12.0


In [3]:
# USE MULTIPLE GPUS
gpus = tf.config.list_physical_devices('GPU')
if len(gpus)<=1:
    strategy = tf.distribute.OneDeviceStrategy(device="/gpu:0")
    print(f'Using {len(gpus)} GPU')
else:
    strategy = tf.distribute.MirroredStrategy()
    print(f'Using {len(gpus)} GPUs')

Using 1 GPU


2024-03-02 23:27:41.829551: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-03-02 23:27:41.846695: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-03-02 23:27:41.846781: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysf

In [4]:
# FOR Kaggle
# sys.path.append('/kaggle/input/d/komekami/hms-harmful-brain-activity-classification')

In [5]:
from src.cfg.v1 import CFG

In [6]:
# 自動混合精度の設定
# https://cocoinit23.com/tensowflow-automatic-mixed-precision/
if CFG.MIX:
  tf.config.optimizer.set_experimental_options({"auto_mixed_precision": True})
  print("Mixed precision enabled")
else:
  print("Using full precision")

Mixed precision enabled


### Load test csv

In [7]:
# label_columnsを取得
train_df = pl.read_csv(os.path.join(CFG.BASE_PATH, "train.csv"))
label_columns = train_df.columns[-6:]

del train_df
gc.collect()

43

In [8]:
test_df = pl.read_csv(os.path.join(CFG.BASE_PATH, "test.csv"))

print("Test shape: ", test_df.shape)
test_df.head()

Test shape:  (1, 3)


spectrogram_id,eeg_id,patient_id
i64,i64,i64
853520,3911565283,6885


### Load test spectrograms

In [9]:
%%time
files = os.listdir(CFG.SPEC_FILE_PATH_INFER)
print(f"There are {len(files)} spectrogram parquets")

specs = {}
for i, v in enumerate(files):
  tmp_df = pl.read_parquet(os.path.join(CFG.SPEC_FILE_PATH_INFER, v))
  name = int(v.split('.')[0])

  specs[name] = tmp_df.drop("time").to_numpy()

print(specs)

There are 1 spectrogram parquets
{853520: array([[14.91, 17.11, 11.66, ...,  0.05,  0.04,  0.05],
       [11.13, 10.95, 10.77, ...,  0.03,  0.03,  0.02],
       [10.88, 10.57,  8.79, ...,  0.05,  0.06,  0.06],
       ...,
       [ 9.61, 13.32,  9.19, ...,  0.39,  0.56,  0.29],
       [ 8.43, 11.84, 13.64, ...,  0.45,  0.45,  0.34],
       [12.33, 11.84,  9.42, ...,  0.46,  0.54,  0.29]], dtype=float32)}
CPU times: user 7.66 ms, sys: 4.67 ms, total: 12.3 ms
Wall time: 5.48 ms


### Load test eeg
- raw eeg dataをspectrogramへ変換

In [10]:
import pywt, librosa

USE_WAVELET = None

NAMES = ["LL", "LP", "RP", "RR"]
FEATS = [
    ["Fp1", "F7", "T3", "T5", "O1"],
    ["Fp1", "F3", "C3", "P3", "O1"],
    ["Fp2", "F8", "T4", "T6", "O2"],
    ["Fp2", "F4", "C4", "P4", "O2"],
]

def spectrogram_from_eeg(parquet_path, display=False):
    # Load middle 50sec of EEG series
    eeg_df = pl.read_parquet(parquet_path).to_pandas()
    middle = (len(eeg_df) - 10_000) // 2
    eeg_df = eeg_df.iloc[middle:middle+10_000]

    # variable to hold spectrogram
    img = np.zeros((128, 256, 4), dtype="float32")

    if display:
        plt.figure(figsize=(10, 7))

    signals = []
    for k in range(4):
        COLS = FEATS[k]

        for kk in range(4):
            # compute pair differences
            x = eeg_df[COLS[kk]].values - eeg_df[COLS[kk+1]].values

            # fill nan
            m = np.nanmean(x)
            if np.isnan(x).mean() < 1:
                x = np.nan_to_num(x, nan=m)
            else:
                x[:] = 0

            # denoise
            if USE_WAVELET:
                x = denoise(x, wavelet=USE_WAVELET)

            signals.append(x)

            # Raw spectrogram
            mel_spec = librosa.feature.melspectrogram(
                y=x,
                sr=200,
                hop_length=len(x)//256,
                n_fft=1024,
                n_mels=128,
                fmin=0,
                fmax=20,
                win_length=128,
            )

            # Log transform
            width = (mel_spec.shape[1] // 32) * 32
            mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max).astype(np.float32)[:, :width]

            # Standardize to -1 to 1
            mel_spec_db = (mel_spec_db+40)/40
            img[:,:,k] += mel_spec_db

        # Average the 4montage differences
        img[:,:,k] /= 4.0

        if display:
            plt.subplot(2, 2, k+1)
            plt.imshow(img[:,:,k], aspect="auto", origin="lower")
            plt.title(f"EEG {v} - Spectrogram {NAMES[k]}")

    if display:
        plt.show()
        plt.figure(figsize=(10,5))
        offset = 0
        for k in range(4):
            if k > 0:
                offset -= signals[3-k].min()
                plt.plot(range(10_000), signals[k]+offset, label=NAMES[3-k])
                offset += signals[3-k].max()

        plt.legend()
        plt.title(f"EEG {v}  signals")
        plt.show()

    return img


def denoise(x, wavelet="haar", level=1):
    coeff = pywt.wavedec(x, wavelet, mode="per")
    sigma = (1/0.6745) * maddest(coeff[-level])

    uthresh = sigma * np.sqrt(2*np.log(len(x)))
    coeff[1:] = (pywt.threshold(i, value=uthresh, mode="hard") for i in coeff[1:])

    ret = pywt.waverec(coeff, wavelet, mode="per")

    return ret

def maddest(d, axis=None):
    return np.mean(np.absolute(d - np.mean(d, axis)), axis)


In [11]:
%%time
DISPLAY = 1
eegs = {}
for i, v in enumerate(test_df["eeg_id"].unique()):
  img = spectrogram_from_eeg(parquet_path=os.path.join(CFG.EEG_FILE_PATH_INFER, f"{v}.parquet"), display=i<DISPLAY)

  eegs[v] = img

ImportError: Numba needs NumPy 1.24 or less

<Figure size 1000x700 with 0 Axes>

### DataLoader

In [12]:
test_df = test_df.to_pandas()

In [13]:
! pip install -U albumentations



In [14]:
from src.loaders.dataloader import DataLoader

test_loader = DataLoader(
  df=test_df,
  specs=specs,
  eegs=eegs,
  label_columns=label_columns,
  batch_size=32,
  shuffle=False,
  mode="test",
)

In [15]:
test_loader.__getitem__(0)

KeyError: 3911565283

In [None]:
# FOR Kaggle
# !pip install --no-index --find-links=/kaggle/input/tf-efficientnet-whl-files /kaggle/input/tf-efficientnet-whl-files/efficientnet-1.1.1-py3-none-any.whl

In [None]:
from src.models.efficientnet_b0 import EfficientNetB0

model = EfficientNetB0.build_model()

preds = []
for i in range(5):
    print(f"Fold {i+1}")
    if CFG.LOAD_MODELS_FROM_INFER == "":
        model.load_weights(f"EfficientNet_v{CFG.VER}_f{i}.h5")
    else:
        model.load_weights(f"{CFG.LOAD_MODELS_FROM_INFER}/EfficientNet_v{CFG.VER}_f{i}.h5")

    pred = model.predict(test_loader, verbose=1)
    preds.append(pred)
pred = np.mean(preds, axis=0)

print("Test preds shape", pred.shape)


Fold 1


2024-03-02 04:55:39.500227: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]
2024-03-02 04:55:39.831661: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:2254] Converted 327/1050 nodes to float16 precision using 18 cast(s) to float16 (excluding Const and Variable casts)
2024-03-02 04:55:40.007912: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8900


Fold 2
Fold 3
Fold 4


2024-03-02 04:55:40.292579: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]
2024-03-02 04:55:40.383900: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]
2024-03-02 04:55:40.476961: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]


Fold 5
Test preds shape (1, 6)


2024-03-02 04:55:40.566508: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]


In [None]:
sub = pd.DataFrame({"eeg_id": test_df["eeg_id"].values})
sub[label_columns] = pred
sub.to_csv("submission.csv", index=False)

print("Submission shape", sub.shape)
sub.head()

Submission shape (1, 7)


Unnamed: 0,eeg_id,seizure_vote,lpd_vote,gpd_vote,lrda_vote,grda_vote,other_vote
0,3911565283,0.168616,0.029283,0.000258,0.455246,0.018882,0.327716


In [None]:
# Check to confirm predictions sum to one
sub.iloc[:, -6:].sum(axis=1)

0    1.0
dtype: float32