In [1]:
import numpy as np
import pandas as pd
import math
import imageio.v3 as imageio
import albumentations as A
from collections import Counter
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm_notebook

from torch import nn
from torch.utils.data import Dataset, DataLoader
from tqdm.notebook import tqdm
from torchvision import transforms

import torch
import torchmetrics
import timm
import pickle
import psutil
import time
import os

In [2]:
class Config:
    # Dataset
    HEIGHT = 128
    WIDTH = 320
    ROOT_FOLDER = '/kaggle/input/birdclef-2024-dataset'
    # Training
    BATCH_SIZE = 16
    VAL_BATCH_SIZE = 16
    N_EPOCHS = 3
    # Model
    BACKBONE = 'efficientvit_b1.r288_in1k'
    # Learning Rate Scheduler
    LR_MAX = 3e-4
    WEIGHT_DECAY = 0.00
    # Others
    SEED = 42
    IS_INTERACTIVE = os.environ['KAGGLE_KERNEL_RUN_TYPE'] == 'Interactive'
    
CONFIG = Config()

In [3]:
# Load Spectrogram PNG Bytes
with open(f'/kaggle/input/effvitmel/X.pkl', 'rb') as file:
    X = pickle.load(file)
    
# Load Labels
with open(f'/kaggle/input/effvitmel/y.pkl', 'rb') as file:
    y = pickle.load(file)
    
# Load Spectrogram PNG Bytes
with open(f'/kaggle/input/effvitmel/X_val.pkl', 'rb') as file:
    X_val = pickle.load(file)
    
# Load Labels
with open(f'/kaggle/input/effvitmel/y_val.pkl', 'rb') as file:
    y_val = pickle.load(file)

# Load Spectrogram PNG Bytes
with open(f'/kaggle/input/effvitwavmel/X1.pkl', 'rb') as file:
    X_wav1 = pickle.load(file)
    
# Load Labels
with open(f'/kaggle/input/effvitwavmel/y1.pkl', 'rb') as file:
    y_wav1 = pickle.load(file)
    
# Load Spectrogram PNG Bytes
with open(f'/kaggle/input/effvitwavmel/X2.pkl', 'rb') as file:
    X_wav2 = pickle.load(file)
    
# Load Labels
with open(f'/kaggle/input/effvitwavmel/y2.pkl', 'rb') as file:
    y_wav2 = pickle.load(file)
    
meta_data = pd.read_csv('/kaggle/input/birdclef-2024/train_metadata.csv')

In [4]:
def extract_center_samples(spectrogram, sample_width, k):
    _, spectrogram_width = spectrogram.shape
    start_index = (spectrogram_width - sample_width * k) // 2
    samples = []
    for i in range(k):
        sample_start = start_index + i * sample_width
        sample_end = sample_start + sample_width
        samples.append(spectrogram[:,sample_start:sample_end])
    return samples

In [5]:
li = []
sz = []
li_2 = []
sz_2 = []
for t,(i,j) in tqdm_notebook(enumerate(zip(X.values(),y.values())), total = len(X)):
    spec = imageio.imread(i)
    _,po = spec.shape
    if(po>=3200):
        samples = extract_center_samples(spec, CONFIG.WIDTH, 5)
        for sample in samples:
            li.append((sample,j))
    elif(po>=1600):
        samples = extract_center_samples(spec, CONFIG.WIDTH, 2)
        for sample in samples:
            li.append((sample,j))
    else:
        while(po<320):
            spec = np.concatenate((spec, spec), axis=1)
            po = po * 2
        samples = extract_center_samples(spec, CONFIG.WIDTH, 1)
        for sample in samples:
            li.append((sample,j))
    sz.append(po)
    
for t,(i,j) in tqdm_notebook(enumerate(zip(X_wav1.values(),y_wav1.values())), total = len(X_wav1)):
    spec = imageio.imread(i)
    _,po = spec.shape
    if(po>=3200):
        samples = extract_center_samples(spec, CONFIG.WIDTH, 1)
        for sample in samples:
            li_2.append((sample,j))
    elif(po>=1600):
        samples = extract_center_samples(spec, CONFIG.WIDTH, 1)
        for sample in samples:
            li_2.append((sample,j))
    else:
        while(po<320):
            spec = np.concatenate((spec, spec), axis=1)
            po = po * 2
        samples = extract_center_samples(spec, CONFIG.WIDTH, 1)
        for sample in samples:
            li_2.append((sample,j))
    sz_2.append(po)

for t,(i,j) in tqdm_notebook(enumerate(zip(X_wav2.values(),y_wav2.values())), total = len(X_wav2)):
    spec = imageio.imread(i)
    _,po = spec.shape
    if(po>=3200):
        samples = extract_center_samples(spec, CONFIG.WIDTH, 1)
        for sample in samples:
            li_2.append((sample,j))
    elif(po>=1600):
        samples = extract_center_samples(spec, CONFIG.WIDTH, 1)
        for sample in samples:
            li_2.append((sample,j))
    else:
        while(po<320):
            spec = np.concatenate((spec, spec), axis=1)
            po = po * 2
        samples = extract_center_samples(spec, CONFIG.WIDTH, 1)
        for sample in samples:
            li_2.append((sample,j))
    sz_2.append(po)

  0%|          | 0/20053 [00:00<?, ?it/s]

  0%|          | 0/16338 [00:00<?, ?it/s]

  0%|          | 0/7930 [00:00<?, ?it/s]

In [6]:
li_val = []
sz_val = []
for t,(i,j) in tqdm_notebook(enumerate(zip(X_val.values(),y_val.values())), total = len(X_val)):
    spec = imageio.imread(i)
    _,po = spec.shape
    while(po<320):
        spec = np.concatenate((spec, spec), axis=1)
        po = po * 2
    samples = extract_center_samples(spec, CONFIG.WIDTH, 1)
    for sample in samples:
        li_val.append((sample,j))
    sz_val.append(po)

  0%|          | 0/2514 [00:00<?, ?it/s]

In [7]:
print(len(li))
print(len(li_2))
print(len(li_val))

41964
24268
2514


In [8]:

# Write X
with open('tr.pkl', 'wb') as f:
    pickle.dump(li, f)
    
# Write X
with open('val.pkl', 'wb') as f:
    pickle.dump(li_val, f)
    
# Write X
with open('tr2.pkl', 'wb') as f:
    pickle.dump(li_2, f)