In [3]:
!pip install torch==1.6.0
!pip install opencv-python
!pip install torchvision==0.2.2
!pip install albumentations
!pip install tensorflow
!pip install pytorch-lightning

Defaulting to user installation because normal site-packages is not writeable
Collecting torch==1.6.0
  Downloading torch-1.6.0-cp36-cp36m-manylinux1_x86_64.whl (748.8 MB)
[K     |████████████████████████████████| 748.8 MB 16 kB/s 
Installing collected packages: torch
Successfully installed torch-1.6.0
You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.[0m
Defaulting to user installation because normal site-packages is not writeable
Collecting opencv-python
  Downloading opencv_python-4.4.0.46-cp36-cp36m-manylinux2014_x86_64.whl (49.5 MB)
[K     |████████████████████████████████| 49.5 MB 29.4 MB/s 
Installing collected packages: opencv-python
Successfully installed opencv-python-4.4.0.46
You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.[0m
Defaulting to user installation because normal site-packages is not writeable
Collecting torchvision==0.2.2
  Downloading torchvision-0.2.2-py2.py3-none-any.w

In [1]:
from pathlib import Path
import numpy as np
import pandas as pd
import typing as tp
import yaml
import random
import os
import sys
import soundfile as sf
import librosa
import cv2
import matplotlib.pyplot as plt
import time
import glob
from tqdm import tqdm

import pickle

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data
import pytorch_lightning as pl
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
# import resnest.torch as resnest_torch

from torchvision import models

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score
# from resnet import ResNet, Bottleneck

from albumentations.core.transforms_interface import DualTransform, BasicTransform
import albumentations as albu

from sklearn.model_selection import StratifiedKFold

pd.options.display.max_rows = 500
pd.options.display.max_columns = 500

## util

In [2]:
config_set = {
    'dataset': {
          'name': 'SpectrogramDataset',
          'params': {
            'img_size': 224, 
            'melspectrogram_parameters': {
              'n_mels': 128, 
              'fmin': 50, 
              'fmax': 15000, 
            }
      }
    },
    'loader': {
      'train': {
        'batch_size': 6,
        'shuffle': True,
        'num_workers': 2,
        'pin_memory': True,
        'drop_last': True,
      },
      'valid': {
        'batch_size': 2,
        'shuffle': False,
        'num_workers': 2,
        'pin_memory': True,
        'drop_last': True,
      }
    }
}
SEED=100
PERIOD = 5
SPECIES_NUM = 24
EPOCH = 50
HOP_LEN = 512
SR = 48000

In [3]:
config = config_set

In [4]:
def set_seed(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)  # type: ignore
set_seed(SEED)

In [5]:
INPUT_ROOT = Path("/home/knikaido/work/Rainforest-Connection/data")
RAW_DATA = INPUT_ROOT / "rfcx-species-audio-detection"
TRAIN_AUDIO_DIR = RAW_DATA / "train"
# TRAIN_RESAMPLED_AUDIO_DIRS = [
#   INPUT_ROOT / "birdsong-resampled-train-audio-{:0>2}".format(i)  for i in range(5)
# ]
TEST_AUDIO_DIR = RAW_DATA / "test"
OUTPUT_DIR = './output/'

In [8]:
OUTPUT_DIR1 = '../175/output/'
OUTPUT_DIR2 = '../153/output/'
OUTPUT_DIR3 = '../119/output/max/'

In [9]:
pred_pathes = sorted(glob.glob(OUTPUT_DIR1 + '*[0-9].csv'))
pred_pathes.extend(sorted(glob.glob(OUTPUT_DIR2 + '*[0-9].csv')))
pred_pathes.extend(sorted(glob.glob(OUTPUT_DIR3 + '*[0-9].csv')))
pred_pathes

['../175/output/175_sub0.csv',
 '../175/output/175_sub1.csv',
 '../175/output/175_sub2.csv',
 '../175/output/175_sub3.csv',
 '../175/output/175_sub4.csv',
 '../153/output/153_sub0.csv',
 '../153/output/153_sub1.csv',
 '../153/output/153_sub2.csv',
 '../153/output/153_sub3.csv',
 '../153/output/153_sub4.csv',
 '../119/output/max/119_sub0.csv',
 '../119/output/max/119_sub1.csv',
 '../119/output/max/119_sub2.csv',
 '../119/output/max/119_sub3.csv',
 '../119/output/max/119_sub4.csv']

In [10]:
df_pred = np.zeros([1992, 24])
for path in pred_pathes:
    df_pred += pd.read_csv(path).iloc[:, 1:].values

In [11]:
df_pred /= len(pred_pathes)

In [12]:
sub = pd.read_csv(str(RAW_DATA / 'sample_submission.csv'))
sub.loc[:, 's0':'s23'] = df_pred

In [15]:
sub.to_csv(OUTPUT_DIR + 'ansamble2_1.csv', index=False)

In [14]:
sub

Unnamed: 0,recording_id,s0,s1,s2,s3,s4,s5,s6,s7,s8,s9,s10,s11,s12,s13,s14,s15,s16,s17,s18,s19,s20,s21,s22,s23
0,000316da7,0.061580,0.000248,0.011417,0.345217,0.001122,0.061391,0.000552,0.001346,0.000476,0.012038,0.000319,0.000445,0.391449,0.000808,0.000836,0.000576,0.009864,0.008249,0.216808,0.000715,0.002624,0.025824,0.000649,0.036361
1,003bc2cb2,0.000519,0.020895,0.012700,0.134852,0.000585,0.003454,0.000230,0.002920,0.000307,0.000387,0.001151,0.000283,0.004026,0.001507,0.011811,0.000918,0.825374,0.018734,0.002467,0.000272,0.001022,0.008409,0.001910,0.000940
2,0061c037e,0.002860,0.001267,0.035887,0.200441,0.015807,0.066499,0.001504,0.065818,0.000225,0.040388,0.029691,0.003869,0.012310,0.001361,0.000609,0.042250,0.008540,0.067944,0.002746,0.002214,0.010841,0.003818,0.042158,0.014392
3,010eb14d3,0.847194,0.000763,0.000086,0.001577,0.000142,0.000307,0.000145,0.000123,0.252372,0.000151,0.000139,0.000037,0.000475,0.000220,0.000768,0.000119,0.000227,0.000091,0.014514,0.000216,0.000081,0.000224,0.000266,0.000195
4,011318064,0.005400,0.000370,0.001254,0.113496,0.000584,0.006243,0.018752,0.002702,0.000495,0.004008,0.000534,0.003777,0.000887,0.001121,0.607720,0.275315,0.001578,0.002954,0.129963,0.000661,0.001036,0.002552,0.000414,0.000659
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1987,ff68f3ac3,0.004497,0.020802,0.012417,0.251741,0.008503,0.236848,0.000482,0.006859,0.001631,0.001122,0.009971,0.002848,0.120477,0.016985,0.000454,0.448866,0.001223,0.002044,0.019451,0.005624,0.007501,0.003478,0.000653,0.465434
1988,ff973e852,0.002052,0.013134,0.001685,0.076396,0.000133,0.003149,0.005233,0.604534,0.001150,0.031840,0.001512,0.017156,0.001060,0.000262,0.008239,0.197491,0.005435,0.144585,0.000833,0.001160,0.020526,0.000186,0.025309,0.003910
1989,ffa5cf6d6,0.000452,0.031248,0.038415,0.474597,0.001276,0.059486,0.000744,0.029252,0.000468,0.187444,0.001228,0.001692,0.030007,0.001036,0.003811,0.487748,0.005606,0.079554,0.001998,0.001133,0.024838,0.002230,0.002162,0.005951
1990,ffa88cbb8,0.013112,0.052232,0.022565,0.585079,0.003268,0.006806,0.000371,0.371543,0.000625,0.038633,0.000435,0.000521,0.071727,0.015233,0.004385,0.010644,0.146763,0.002439,0.003661,0.000343,0.005680,0.005276,0.000876,0.003680


In [6]:
train_gby = pd.read_pickle(RAW_DATA / "train_gby_mel.pkl")
train_gby.head()

Unnamed: 0,recording_id,species_id,songtype_id,t_min,f_min,t_max,f_max,name
0,003bec244,[14],[1],[44.544],[2531.25],[45.1307],[5531.25],/home/knikaido/work/Rainforest-Connection/Git/...
1,006ab765f,[23],[1],[39.9615],[7235.16],[46.0452],[11283.4],/home/knikaido/work/Rainforest-Connection/Git/...
2,007f87ba2,[12],[1],[39.135999999999996],[562.5],[42.272],[3281.25],/home/knikaido/work/Rainforest-Connection/Git/...
3,0099c367b,[17],[4],[51.4206],[1464.26],[55.1996],[4565.04],/home/knikaido/work/Rainforest-Connection/Git/...
4,009b760e6,[10],[1],[50.0854],[947.461],[52.5293],[10852.7],/home/knikaido/work/Rainforest-Connection/Git/...


In [7]:
def mono_to_color(
    X: np.ndarray, mean=None, std=None,
    norm_max=None, norm_min=None, eps=1e-6
):
    # Stack X as [X,X,X]
    X = np.stack([X, X, X], axis=-1)

    # Standardize
    mean = mean or X.mean()
    X = X - mean
    std = std or X.std()
    Xstd = X / (std + eps)
    _min, _max = Xstd.min(), Xstd.max()
    norm_max = norm_max or _max
    norm_min = norm_min or _min
    if (_max - _min) > eps:
        # Normalize to [0, 255]
        V = Xstd
        V[V < norm_min] = norm_min
        V[V > norm_max] = norm_max
        V = 255 * (V - norm_min) / (norm_max - norm_min)
        V = V.astype(np.uint8)
    else:
        # Just zero
        V = np.zeros_like(Xstd, dtype=np.uint8)
    return V

In [8]:
def get_criterion():
    pos_weights = torch.ones(SPECIES_NUM)
    pos_weights = pos_weights * SPECIES_NUM
    loss_function = nn.BCEWithLogitsLoss(pos_weight=pos_weights)
    return loss_function

In [9]:
class LitModule(pl.LightningModule):
    def __init__(self):
        super().__init__()
        self.encoder = torch.hub.load('zhanghang1989/ResNeSt', 'resnest50', pretrained=True)
        self.encoder.fc = nn.Sequential(
            nn.Linear(2048, 1024),
            nn.ReLU(),
            nn.Dropout(p=0.2),
            nn.Linear(1024, 1024),
            nn.ReLU(),
            nn.Dropout(p=0.2),
            nn.Linear(1024, SPECIES_NUM)
        )
        
        self.criterion = get_criterion()
        
    def forward(self, x):
        x_out = self.encoder(x)
        return x_out
    
    def configure_optimizers(self):
        optimizer = torch.optim.SGD(model.parameters(), lr=0.01, weight_decay=0.0001, momentum=0.9)
        return optimizer
    
    def training_step(self, train_batch, batch_idx):
        x, y = train_batch
        y_pred = self.encoder(x)    
        loss = self.criterion(y_pred, y)
        self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return loss
    
    def validation_step(self, val_batch, batch_idx):
        x, y = val_batch
        y_pred = self.encoder(x)
        loss = self.criterion(y_pred, y)
        self.log('val_loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return loss

    
    def validation_epoch_end(self, validation_step_outputs):
        mean_loss = torch.stack([x for x in validation_step_outputs]).mean()
        print('valid_epoch_loss = ', mean_loss)
        self.log('valid_epoch_loss', mean_loss, prog_bar=True, logger=True)
#         tqdm.write('Dice: \t%.3f' % mean_loss)
        return mean_loss

In [10]:
def signal_to_mel(y, sr, mel_params):
    
    len_y = len(y)
    effective_length = int(SR * PERIOD)
    
    start = 0
    end = start + effective_length
    
    images = []
    while(start < len_y):
        if(end > len_y):
            break
        y_ele = y[start:end]
        
        melspec = librosa.feature.melspectrogram(y_ele, sr=sr, **mel_params['melspectrogram_parameters'])
        melspec = librosa.power_to_db(melspec).astype(np.float32)
    
        image = mono_to_color(melspec)
        height, width, _ = image.shape
        image = cv2.resize(image, (int(width * mel_params['img_size'] / height), mel_params['img_size']))
        image = np.moveaxis(image, 2, 0)
        image = (image / 255.0).astype(np.float32)
#         image = torch.from_numpy(image).clone()
        images.append(image)
        
        start = end
        end += effective_length
        
    return np.array(images)

In [11]:
test_wav_pathes = sorted(glob.glob(str(TEST_AUDIO_DIR / '*.flac')))
len(test_wav_pathes)

1992

In [12]:
device = torch.device("cuda")

In [13]:
model_pathes = sorted(glob.glob('./output/model*'))
model_pathes

['./output/model0',
 './output/model1',
 './output/model2',
 './output/model3',
 './output/model4']

In [14]:
for i, model_path in enumerate(model_pathes):
    model = LitModule()
    model.load_state_dict(torch.load(model_path))
    model.eval().to(device)
    preds = []

    for path in tqdm(test_wav_pathes):
        y, sr = sf.read(path)
        mel_img = signal_to_mel(y, sr, config["dataset"]["params"])
        mel_img = torch.from_numpy(mel_img).clone().to(device)
        pred = model(mel_img)
        pred = nn.Softmax()(pred)
        pred = torch.mean(pred, 0)
        pred = pred.to('cpu').detach().numpy().copy()
        preds.append(pred)

    preds = np.array(preds)
    sub = pd.read_csv(str(RAW_DATA / 'sample_submission.csv'))
    sub.loc[:, 's0':'s23'] = preds
    sub.to_csv(OUTPUT_DIR + '37sub' + str(i) + '.csv', index=False)
    #     break

Using cache found in /home/user/.cache/torch/hub/zhanghang1989_ResNeSt_master
  if sys.path[0] == '':
100%|██████████| 1992/1992 [17:39<00:00,  1.88it/s]
Using cache found in /home/user/.cache/torch/hub/zhanghang1989_ResNeSt_master
100%|██████████| 1992/1992 [17:49<00:00,  1.86it/s]
Using cache found in /home/user/.cache/torch/hub/zhanghang1989_ResNeSt_master
100%|██████████| 1992/1992 [17:42<00:00,  1.87it/s]
Using cache found in /home/user/.cache/torch/hub/zhanghang1989_ResNeSt_master
100%|██████████| 1992/1992 [17:49<00:00,  1.86it/s]
Using cache found in /home/user/.cache/torch/hub/zhanghang1989_ResNeSt_master
100%|██████████| 1992/1992 [17:39<00:00,  1.88it/s]
