In [1]:
!nvidia-smi

Mon Sep 20 10:48:06 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.63.01    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   34C    P0    23W / 300W |      0MiB / 16160MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

## Common Setting

In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [9]:
import os
import sys

COMPETITION_NAME = 'g2net-gravitational-wave-detection'

KAGGLE_DIR = '/content/gdrive/MyDrive/kaggle'
sys.path.append(KAGGLE_DIR)
from scripts.utils import mkdir, load_json

# prepare github
GIT_CONFIG_PATH = f'{KAGGLE_DIR}/secrets/github.json'
GIT_CONFIG = load_json(GIT_CONFIG_PATH)
GIT_USER_NAME = GIT_CONFIG['user.name']
GIT_USER_EMAIL = GIT_CONFIG['user.email']
GIT_TOKEN = GIT_CONFIG['token']
GIT_REPOSITORY_NAME = 'kaggle-' + COMPETITION_NAME
!git config --global user.name {GIT_USER_NAME}
!git config --global user.email {GIT_USER_EMAIL}

# prepare kaggle API
!pip install -q kaggle
!pip install -q --upgrade --force-reinstall --no-deps kaggle
mkdir('/root/.kaggle')
!cp {KAGGLE_DIR}/secrets/kaggle.json /root/.kaggle/

# prepare wandb
WANDB_JSON_PATH = f"{KAGGLE_DIR}/secrets/wandb.json"
WANDB_CONFIG = load_json(WANDB_JSON_PATH)
WANDB_API_KEY = WANDB_CONFIG['api_key']


## Prepare Dataset

In [4]:
# prepare input dir
GDRIVE_INPUT_DIR = f'{KAGGLE_DIR}/competitions/{COMPETITION_NAME}/input'
COLAB_INPUT_DIR = '/content/input'
mkdir(GDRIVE_INPUT_DIR)
mkdir(COLAB_INPUT_DIR)


In [5]:
# download dataset into gdrive
is_download = False
if is_download:
    !kaggle competitions download -c {COMPETITION_NAME} -p {GDRIVE_INPUT_DIR}


In [6]:
# copy into colab
is_copy = True
if is_copy:
    %cp -r {GDRIVE_INPUT_DIR}/* {COLAB_INPUT_DIR}/


In [7]:
# mount gcs to access kaggle personal dataset
is_use_gcs = True
if is_use_gcs:
    # authentication
    from google.colab import auth
    auth.authenticate_user()

    # install gcsfuse
    !echo "deb http://packages.cloud.google.com/apt gcsfuse-`lsb_release -c -s` main" | sudo tee /etc/apt/sources.list.d/gcsfuse.list
    !curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add -
    !apt-get -y -q update
    !apt-get -y -q install gcsfuse

    # mount
    BUCKET1 = "kds-51f153f632521a32be4425a82f5ef3b9d1330dc5cab7403a059157f5" # train part1
    BUCKET2 = "kds-be6b664556db70d53096072ab31ed1183329271fd0df17be418c1cc5" # train part2
    BUCKET3 = "kds-9e101f96a723d803be9e0b1537cbf82c6a27ded057f1df58b1738853" # test

    MOUNT_DIR1 = os.path.join(COLAB_INPUT_DIR, 'train_fold01')
    MOUNT_DIR2 = os.path.join(COLAB_INPUT_DIR, 'train_fold23')
    MOUNT_DIR3 = os.path.join(COLAB_INPUT_DIR, 'test')
    mkdir(MOUNT_DIR1)
    mkdir(MOUNT_DIR2)
    mkdir(MOUNT_DIR3)

    !gcsfuse --implicit-dirs --limit-bytes-per-sec -1 --limit-ops-per-sec -1 {BUCKET1} {MOUNT_DIR1}
    !gcsfuse --implicit-dirs --limit-bytes-per-sec -1 --limit-ops-per-sec -1 {BUCKET2} {MOUNT_DIR2}
    !gcsfuse --implicit-dirs --limit-bytes-per-sec -1 --limit-ops-per-sec -1 {BUCKET3} {MOUNT_DIR3}

deb http://packages.cloud.google.com/apt gcsfuse-bionic main
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  2537  100  2537    0     0  87482      0 --:--:-- --:--:-- --:--:-- 87482
OK
Hit:1 http://archive.ubuntu.com/ubuntu bionic InRelease
Get:2 http://archive.ubuntu.com/ubuntu bionic-updates InRelease [88.7 kB]
Get:3 http://ppa.launchpad.net/c2d4u.team/c2d4u4.0+/ubuntu bionic InRelease [15.9 kB]
Hit:4 http://ppa.launchpad.net/cran/libgit2/ubuntu bionic InRelease
Get:5 https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/ InRelease [3,626 B]
Get:6 http://archive.ubuntu.com/ubuntu bionic-backports InRelease [74.6 kB]
Hit:7 http://ppa.launchpad.net/deadsnakes/ppa/ubuntu bionic InRelease
Hit:8 http://ppa.launchpad.net/graphics-drivers/ppa/ubuntu bionic InRelease
Get:9 http://security.ubuntu.com/ubuntu bionic-security InRelease [88.7 kB]
Ign:10 https://developer.downlo

## Prepare working directory

In [5]:
# prepare work dir
from scripts.utils import get_work_dir
WORKING_DIR = f'{KAGGLE_DIR}/competitions/{COMPETITION_NAME}/working'
WORK_DIR = get_work_dir(WORKING_DIR)
exp_num = os.path.basename(WORK_DIR)
%cd {WORK_DIR}


/content/gdrive/MyDrive/kaggle/competitions/g2net-gravitational-wave-detection/working/002


## Install Packages

In [9]:
!pip install -r requirements.txt

Collecting wandb
  Downloading wandb-0.12.2-py2.py3-none-any.whl (1.7 MB)
[?25l[K     |▏                               | 10 kB 34.5 MB/s eta 0:00:01[K     |▍                               | 20 kB 41.8 MB/s eta 0:00:01[K     |▋                               | 30 kB 46.1 MB/s eta 0:00:01[K     |▉                               | 40 kB 26.4 MB/s eta 0:00:01[K     |█                               | 51 kB 16.7 MB/s eta 0:00:01[K     |█▏                              | 61 kB 14.2 MB/s eta 0:00:01[K     |█▍                              | 71 kB 13.4 MB/s eta 0:00:01[K     |█▋                              | 81 kB 14.8 MB/s eta 0:00:01[K     |█▊                              | 92 kB 16.4 MB/s eta 0:00:01[K     |██                              | 102 kB 12.7 MB/s eta 0:00:01[K     |██▏                             | 112 kB 12.7 MB/s eta 0:00:01[K     |██▍                             | 122 kB 12.7 MB/s eta 0:00:01[K     |██▋                             | 133 kB 12.7 MB/s eta 

## Training

In [24]:
# Training
!python train.py -i {COLAB_INPUT_DIR} -e {exp_num} --wandb_api_key {WANDB_API_KEY}

[34m[1mwandb[0m: Currently logged in as: [33mnaoyakintoki[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Tracking run with wandb version 0.12.2
[34m[1mwandb[0m: Syncing run [33m002_fold0[0m
[34m[1mwandb[0m: ⭐️ View project at [34m[4mhttps://wandb.ai/naoyakintoki/kaggle-g2net-gravitational-wave-detection[0m
[34m[1mwandb[0m: 🚀 View run at [34m[4mhttps://wandb.ai/naoyakintoki/kaggle-g2net-gravitational-wave-detection/runs/2ncun2kf[0m
[34m[1mwandb[0m: Run data is saved locally in /content/gdrive/My Drive/kaggle/competitions/g2net-gravitational-wave-detection/working/002/wandb/run-20210920_101202-2ncun2kf
[34m[1mwandb[0m: Run `wandb offline` to turn off syncing.

CQT kernels created, time used = 0.0108 seconds
2021-09-20 10:12:08.591418: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), 

In [14]:
!ls -a

config.py   main.ipynb		__pycache__	  src
.gitignore  make_submission.py	README.md	  train.py
LICENSE     output		requirements.txt  wandb


In [13]:
!rm -r ./.git

In [15]:
%cd ..

/content/gdrive/MyDrive/kaggle/competitions/g2net-gravitational-wave-detection/working


In [16]:
!git clone https://{GIT_USER_NAME}:{GIT_TOKEN}@github.com/{GIT_USER_NAME}/{GIT_REPOSITORY_NAME}.git "999_"

Cloning into '999_'...
remote: Enumerating objects: 37, done.[K
remote: Counting objects: 100% (37/37), done.[K
remote: Compressing objects: 100% (30/30), done.[K
remote: Total 37 (delta 9), reused 25 (delta 4), pack-reused 0[K
Unpacking objects: 100% (37/37), done.


In [18]:
!cp -r ./999_/.git ./002/

fatal: not a git repository (or any parent up to mount point /content)
Stopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).


In [19]:
# Commit & Push
!git add .
!git commit -m "update"
!git push origin main

fatal: not a git repository (or any parent up to mount point /content)
Stopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).
fatal: not a git repository (or any parent up to mount point /content)
Stopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).
fatal: not a git repository (or any parent up to mount point /content)
Stopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).


In [10]:
GIT_USER_NAME

'kn25ha01'

## Testing

In [None]:
import glob
cps = glob.glob(f"./output/*_best_score.pth")
for c in cps:
    print(c)
cps = ",".join(cps)

./output/tf_efficientnet_b0_ns_fold0_best_score.pth
./output/tf_efficientnet_b0_ns_fold1_best_score.pth
./output/tf_efficientnet_b0_ns_fold2_best_score.pth
./output/tf_efficientnet_b0_ns_fold3_best_score.pth


In [None]:
# Make Submission
!python make_submission.py -i {COLAB_INPUT_DIR} -c {cps}

CQT kernels created, time used = 0.0112 seconds
2021-09-20 07:04:11.869143: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-09-20 07:04:11.877666: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-09-20 07:04:11.878343: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-09-20 07:04:11.879300: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX512F
To enable them in other operations, rebuild

In [None]:
# Post Submission
!kaggle competitions submit -c {COMPETITION_NAME} -f ./output/submission.csv -m {exp_num}

100% 4.66M/4.66M [00:05<00:00, 865kB/s]
Successfully submitted to G2Net Gravitational Wave Detection