In [1]:
from pathlib import Path
import numpy as np
import pandas as pd
import typing as tp
import yaml
import random
import os
import sys
import soundfile as sf
import librosa
import cv2
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data
import resnest.torch as resnest_torch

from torchvision import models

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score
from radam import RAdam
from scipy.ndimage import maximum_filter1d
from tqdm import tqdm
import IPython

from scipy.io.wavfile import write
pd.options.display.max_rows = 500
pd.options.display.max_columns = 500

In [2]:
def set_seed(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)  # type: ignore
#     torch.backends.cudnn.deterministic = True  # type: ignore
#     torch.backends.cudnn.benchmark = True  # type: ignore
    

In [3]:
# ROOT = Path.cwd().parent
# INPUT_ROOT = ROOT / "input"
INPUT_ROOT = Path("/home/knikaido/work/Cornell-Birdcall-Identification/data")
RAW_DATA = INPUT_ROOT / "birdsong-recognition"
TRAIN_AUDIO_DIR = RAW_DATA / "train_audio"
TRAIN_RESAMPLED_AUDIO_DIRS = [
  INPUT_ROOT / "birdsong-resampled-train-audio-{:0>2}".format(i)  for i in range(5)
]
TEST_AUDIO_DIR = RAW_DATA / "test_audio"
# TRAIN_RESAMPLED_AUDIO_DIRS = [
#   INPUT_ROOT / "birdsong-resampled-train-audio-hpss-npz"
# ]


In [4]:
tmp_list = []
for audio_d in TRAIN_RESAMPLED_AUDIO_DIRS:
    if not audio_d.exists():
        continue
    for ebird_d in audio_d.iterdir():
        if ebird_d.is_file():
            continue
        for wav_f in ebird_d.iterdir():
            tmp_list.append([ebird_d.name, wav_f.name, wav_f.as_posix()])
            
train_wav_path_exist = pd.DataFrame(
    tmp_list, columns=["ebird_code", "resampled_filename", "file_path"])

del tmp_list

# train_all = pd.merge(
#     train, train_wav_path_exist, on=["ebird_code", "resampled_filename"], how="inner")

# print(train.shape)
print(train_wav_path_exist.shape)
# print(train_all.shape)

(21375, 3)


In [37]:
def _get_strength(x, file_name):
    strength_list_train = []
    time_dim, base_dim = x.shape[1], x.shape[0]
    x_sum = []

    for j in range(0,time_dim-100):
        x_temp = x[:,j:j+100].sum()
#         x_temp = x[0:84,j:j + base_dim].sum()
        x_sum.append(x_temp)
    x_ave = np.average(x_sum)
    strength_list_train.append([x_sum, x_ave, file_name])
#     print(strength_list_train)
    return strength_list_train

In [38]:
def decide_start_pos(strength_list):

    x_sum = strength_list[0][0]
    x_ave = strength_list[0][1]
#     print(len(x_sum))

    flag = True
    while flag:
        crop = random.randint(0, int(len(x_sum)-313))
        crop_max = np.max(x_sum[crop:crop+313])
        # all 0
                       
        if crop_max > x_ave:
            flag = False
    return crop

In [42]:
save_dir = '/home/knikaido/work/Cornell-Birdcall-Identification/data/birdsong-resampled-train-audio-hpss-npz/'
sr=32000
strength_lists = []
for i, file_name in tqdm(enumerate(train_wav_path_exist['file_path'])):
#     try:
    y, sr = sf.read(file_name)

    melspec = librosa.feature.melspectrogram(y, sr=sr, fmin=20, fmax=16000)
    melspec = librosa.power_to_db(melspec).astype(np.float32)
#     plt.figure(figsize=(8, 4))
#     plt.imshow(melspec)
    strength_list = _get_strength(melspec, file_name)
    strength_lists.append(np.array(strength_list[0]))
    print(melspec.shape)
    print(len(strength_list[0][0]))
#     plt.figure(figsize=(8, 4))
#     plt.plot(strength_list[0][0])
#     idx = decide_start_pos(strength_list)
    
#     plt.vlines([idx], np.min(strength_list[0][0]), np.max(strength_list[0][0]), "blue", linestyles='dashed') 
#     plt.vlines([idx+313], np.min(strength_list[0][0]), np.max(strength_list[0][0]), "blue", linestyles='dashed') 
    
#     plt.figure(figsize=(8, 4))
#     plt.vlines([512*idx], -1, 1, "blue", linestyles='dashed') 
#     plt.vlines([512*(idx+313)], -1, 1, "blue", linestyles='dashed') 
#     plt.plot(y)
#     print(idx)
    break

        
        
#         save_path = save_dir + file_name.split('/', 7)[-1]
#         print(save_path[:-4])
#     if(i==5):break
#         my_makedirs(save_path)
#         np.save(save_path[:-4], y.astype('float32'))
#     except:
#         print('err!!!' + file_name)

0it [00:00, ?it/s]

(128, 879)
779





In [33]:
train_wav_path_exist['file_path'][21374]

'/home/knikaido/work/Cornell-Birdcall-Identification/data/birdsong-resampled-train-audio-04/snobun/XC299754.wav'

In [40]:
np.save('./strangh_lists.npy', np.array(strength_lists))