## Features+Head Starter - infer

https://www.kaggle.com/code/nartaa/features-head-starter/notebook

### Configration

In [1]:
# モジュールの動的import(import先のファイルが更新されたときに追従する)
%load_ext autoreload
%autoreload 2

In [2]:
! pip install numpy=="1.24.0"



In [3]:
import os, gc

import librosa
import numpy as np
import polars as pl
import pandas as pd

import tensorflow as tf
import matplotlib.pyplot as plt

print("TensorFlow version =", tf.__version__)

2024-03-25 22:54:35.641994: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-03-25 22:54:35.663477: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


TensorFlow version = 2.12.0


In [4]:
# USE MULTIPLE GPUS
gpus = tf.config.list_physical_devices('GPU')
if len(gpus)<=1:
    strategy = tf.distribute.OneDeviceStrategy(device="/gpu:0")
    print(f'Using {len(gpus)} GPU')
else:
    strategy = tf.distribute.MirroredStrategy()
    print(f'Using {len(gpus)} GPUs')

Using 1 GPU


2024-03-25 22:54:36.802368: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-03-25 22:54:36.821252: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-03-25 22:54:36.821336: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysf

In [5]:
from src.cfg.v2 import CFG

In [6]:
# 自動混合精度の設定
# https://cocoinit23.com/tensowflow-automatic-mixed-precision/
if CFG.MIX:
  tf.config.optimizer.set_experimental_options({"auto_mixed_precision": True})
  print("Mixed precision enabled")
else:
  print("Using full precision")

Mixed precision enabled


In [7]:
TARGETS = ["seizure_vote", "lpd_vote", "gpd_vote", "lrda_vote", "grda_vote", "other_vote"]
META = ["spectrogram_id", "spectrogram_label_offset_seconds", "patient_id", "expert_consensus"]

FEATS = [['Fp1','F7','T3','T5','O1'],
         ['Fp1','F3','C3','P3','O1'],
         ['Fp2','F8','T4','T6','O2'],
         ['Fp2','F4','C4','P4','O2']]
FEATS2 = ['Fp1','T3','C3','O1','Fp2','C4','T4','O2']

### Load test csv

In [8]:
test_df = pd.read_csv(os.path.join(CFG.BASE_PATH, "test.csv"))

test_df.head()

Unnamed: 0,spectrogram_id,eeg_id,patient_id
0,853520,3911565283,6885


### Load test spectrograms and eegs

In [9]:
%%time
files = os.listdir(os.path.join(CFG.BASE_PATH, "test_spectrograms"))
print(f"There are {len(files)} spectrogram parquets")

specs = {}
for i, v in enumerate(files):
  tmp_df = pd.read_parquet(os.path.join(CFG.BASE_PATH, "test_spectrograms", v))
  name = int(v.split('.')[0])

  specs[name] = tmp_df.iloc[:, 1:].values

print(specs)

There are 1 spectrogram parquets
{853520: array([[14.91, 17.11, 11.66, ...,  0.05,  0.04,  0.05],
       [11.13, 10.95, 10.77, ...,  0.03,  0.03,  0.02],
       [10.88, 10.57,  8.79, ...,  0.05,  0.06,  0.06],
       ...,
       [ 9.61, 13.32,  9.19, ...,  0.39,  0.56,  0.29],
       [ 8.43, 11.84, 13.64, ...,  0.45,  0.45,  0.34],
       [12.33, 11.84,  9.42, ...,  0.46,  0.54,  0.29]], dtype=float32)}
CPU times: user 43 ms, sys: 16.2 ms, total: 59.2 ms
Wall time: 42.6 ms


In [10]:
def spectrogram_from_eeg(parquet_path):
    # Load middle 50sec of EEG series
    eeg_df = pl.read_parquet(parquet_path).to_pandas()
    middle = (len(eeg_df) - 10_000) // 2
    eeg_df = eeg_df.iloc[middle:middle+10_000]

    # variable to hold spectrogram
    img = np.zeros((128, 256, 4), dtype="float32")

    signals = []
    for k in range(4):
        COLS = FEATS[k]

        for kk in range(4):
            # compute pair differences
            x = eeg_df[COLS[kk]].values - eeg_df[COLS[kk+1]].values

            # fill nan
            m = np.nanmean(x)
            if np.isnan(x).mean() < 1:
                x = np.nan_to_num(x, nan=m)
            else:
                x[:] = 0

            signals.append(x)

            # Raw spectrogram
            mel_spec = librosa.feature.melspectrogram(
                y=x,
                sr=200,
                hop_length=len(x)//256,
                n_fft=1024,
                n_mels=128,
                fmin=0,
                fmax=20,
                win_length=128,
            )

            # Log transform
            width = (mel_spec.shape[1] // 32) * 32
            mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max).astype(np.float32)[:, :width]

            # Standardize to -1 to 1
            mel_spec_db = (mel_spec_db+40)/40
            img[:,:,k] += mel_spec_db

        # Average the 4montage differences
        img[:,:,k] /= 4.0

    return img

In [11]:
eegs = {}
for i, v in enumerate(test_df["eeg_id"].unique()):
    img = spectrogram_from_eeg(os.path.join(CFG.BASE_PATH, "test_eegs", f"{v}.parquet"))
    eegs[v] = img

### DataLoader

In [12]:
! pip install -U albumentations

Collecting numpy>=1.24.4 (from albumentations)
  Using cached numpy-1.26.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.2 MB)
Installing collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 1.24.0
    Uninstalling numpy-1.24.0:
      Successfully uninstalled numpy-1.24.0
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
cudf 23.8.0 requires cupy-cuda11x>=12.0.0, which is not installed.
cuml 23.8.0 requires cupy-cuda11x>=12.0.0, which is not installed.
dask-cudf 23.8.0 requires cupy-cuda11x>=12.0.0, which is not installed.
apache-beam 2.46.0 requires dill<0.3.2,>=0.3.1.1, but you have dill 0.3.7 which is incompatible.
apache-beam 2.46.0 requires numpy<1.25.0,>=1.14.3, but you have numpy 1.26.4 which is incompatible.
apache-beam 2.46.0 requires pyarrow<10.0.0,>=3.0.0, but you have pyarrow 11.0.0 which

In [13]:
from src.loaders.dataloader import DataLoader

test_loader = DataLoader(
  df=test_df,
  specs=specs,
  eegs=eegs,
  augment=True,
)

[autoreload of numpy.core.overrides failed: Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/IPython/extensions/autoreload.py", line 276, in check
    superreload(m, reload, self.old_objects)
  File "/opt/conda/lib/python3.10/site-packages/IPython/extensions/autoreload.py", line 475, in superreload
    module = reload(module)
  File "/opt/conda/lib/python3.10/importlib/__init__.py", line 169, in reload
    _bootstrap._exec(spec, module)
  File "<frozen importlib._bootstrap>", line 619, in _exec
  File "<frozen importlib._bootstrap_external>", line 883, in exec_module
  File "<frozen importlib._bootstrap>", line 241, in _call_with_frames_removed
  File "/opt/conda/lib/python3.10/site-packages/numpy/core/overrides.py", line 8, in <module>
    from numpy.core._multiarray_umath import (
ImportError: cannot import name '_ArrayFunctionDispatcher' from 'numpy.core._multiarray_umath' (/opt/conda/lib/python3.10/site-packages/numpy/core/_multiarray_umath.cpython

### Infer

In [15]:
from src.models.efficientnet_b0 import EfficientNetB0

model = EfficientNetB0.build_model()

preds = []
for i in range(5):
    print(f"Fold {i+1}")
    if CFG.LOAD_MODELS_FROM_INFER == "":
        model.load_weights(f"EfficientNet_v{CFG.VER}_f{i}.h5")
    else:
        model.load_weights(f"{CFG.LOAD_MODELS_FROM_INFER}/EfficientNet_v{CFG.VER}_f{i}.h5")

    pred = model.predict(test_loader, verbose=1)
    preds.append(pred)
pred = np.mean(preds, axis=0)

print("Test preds shape", pred.shape)


Fold 1


FileNotFoundError: [Errno 2] Unable to open file (unable to open file: name = '/kaggle/input/hms-efficientnet-b0/EfficientNet_v2_f0.h5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)

In [None]:
sub = pd.DataFrame({"eeg_id": test_df["eeg_id"].values})
sub[label_columns] = pred
sub.to_csv("submission.csv", index=False)

print("Submission shape", sub.shape)
sub.head()

Submission shape (1, 7)


Unnamed: 0,eeg_id,seizure_vote,lpd_vote,gpd_vote,lrda_vote,grda_vote,other_vote
0,3911565283,0.168616,0.029283,0.000258,0.455246,0.018882,0.327716


In [None]:
# Check to confirm predictions sum to one
sub.iloc[:, -6:].sum(axis=1)

0    1.0
dtype: float32