In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import polars as pl
import os
import glob
import shutil
import zipfile
import librosa
from IPython.display import Audio
import pickle
from joblib import dump, load
from pathlib import Path
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, roc_auc_score

In [2]:
config = {}
Test = True

if Test:
    config['root_file_path'] = '/kaggle/input/birdclef-2024/unlabeled_soundscapes/'
    config['start'] = 7000
    config['end'] = 8200
    filenames_with_path = glob.glob(f"{config['root_file_path']}*.ogg")
    filenames = [os.path.basename(filename) for filename in filenames_with_path][config['start']:config['end']]
    print(len(filenames))
else:
    config['root_file_path'] = '/kaggle/input/birdclef-2024/test_soundscapes/'
    filenames_with_path = glob.glob(f"{config['root_file_path']}*.ogg")
    filenames = [os.path.basename(filename) for filename in filenames_with_path]
    print(len(filenames))

1200


In [3]:
sr = 32000
fmin = 20
fmax = 15000
slice_duration = 5
n_mels = 128
n_fft = n_mels*8
size_x = 512

hop_length = int(sr*slice_duration / size_x)
print(hop_length)

312


In [None]:
from tqdm import tqdm

chunk_feature_dict = {}

for file in tqdm(filenames):
    full_path = config['root_file_path']+file
    audio, sample_rate = librosa.load(path=full_path, sr=sr)
    cur_file = file.replace('.ogg','')
    samples_per_segment = sample_rate * 5
    if len(audio) > 7680000:
        total_samples = 7680000
    else:
        total_samples = len(audio)
        
    for i in range(0, total_samples+160000, samples_per_segment):
        if i + samples_per_segment <= total_samples:
            segment = audio[i:i + samples_per_segment]
            chunk = cur_file+'_'+str(int((i/32000)+5))
            mfccs = librosa.feature.mfcc(y=segment, sr=32000, n_mfcc=40)
            # flattened_features = (np.mean(mfccs.T, axis=0)).reshape(1, -1)
            # melspec = librosa.feature.melspectrogram(y=segment, sr=sr, n_fft=n_fft, n_mels=n_mels, hop_length=hop_length, fmin=fmin, fmax=fmax)
            # mfccs = librosa.feature.mfcc(y=segment, sr=sr, n_fft=n_fft, n_mels=n_mels, hop_length=hop_length, fmin=fmin, fmax=fmax)
            # flattened_melspec_features = (np.mean(melspec.T, axis=0))
            # flattened_mfcc_features = (np.mean(mfccs.T, axis=0))
            # flattened_features = np.append(flattened_melspec_features, flattened_mfcc_features)
            # chunk_feature_dict[chunk] = np.ascontiguousarray(flattened_features)
            flattened_features = (np.mean(mfccs.T, axis=0))
            chunk_feature_dict[chunk] = flattened_features

 96%|█████████▋| 1155/1200 [30:26<01:09,  1.54s/it]

In [6]:
meta_data = pl.read_csv('../input/birdclef-2024/train_metadata.csv', low_memory=True)
bird_cols = list(meta_data['primary_label'].unique().sort())

submit = pd.read_csv("/kaggle/input/birdclef-2024/sample_submission.csv")
submit['row_id'] = 'samples'

submit

Unnamed: 0,row_id,asbfly,ashdro1,ashpri1,ashwoo2,asikoe2,asiope1,aspfly1,aspswi1,barfly1,...,whbwoo2,whcbar1,whiter2,whrmun,whtkin2,woosan,wynlau1,yebbab1,yebbul3,zitcis1
0,samples,0.005495,0.005495,0.005495,0.005495,0.005495,0.005495,0.005495,0.005495,0.005495,...,0.005495,0.005495,0.005495,0.005495,0.005495,0.005495,0.005495,0.005495,0.005495,0.005495
1,samples,0.005495,0.005495,0.005495,0.005495,0.005495,0.005495,0.005495,0.005495,0.005495,...,0.005495,0.005495,0.005495,0.005495,0.005495,0.005495,0.005495,0.005495,0.005495,0.005495
2,samples,0.005495,0.005495,0.005495,0.005495,0.005495,0.005495,0.005495,0.005495,0.005495,...,0.005495,0.005495,0.005495,0.005495,0.005495,0.005495,0.005495,0.005495,0.005495,0.005495


In [7]:
rbf_kernel_svm_model = load('/kaggle/input/mix-n-match-kmeans-model/rbf_kernel_svm_model_mfcc_c5_mixmatch.joblib')

In [8]:
for file_name, feature_set in tqdm(chunk_feature_dict.items()):
    feature_set_reshaped = feature_set.reshape(1, -1)
    predictions = np.ascontiguousarray(rbf_kernel_svm_model.predict_proba(feature_set_reshaped))
    # predictions = rbf_kernel_svm_model.predict_proba(feature_set)
    df = pd.DataFrame(predictions, columns=bird_cols)
    df.insert(loc=0, column='row_id', value=file_name)
    submit = pd.concat([submit,df]).reset_index(drop=True)
    
i = submit[(submit.row_id == 'samples')].index
submit = submit.drop(i).reset_index(drop=True)

  0%|          | 0/57251 [00:00<?, ?it/s]


ValueError: Shape of passed values is (1, 130), indices imply (1, 182)

In [8]:
"""for col in bird_cols:
    if col in poor_scorers:
        submit[col].values[:] = 0.00000"""

'for col in bird_cols:\n    if col in poor_scorers:\n        submit[col].values[:] = 0.00000'

In [9]:
sample_submission = pd.read_csv("/kaggle/input/birdclef-2024/sample_submission.csv")
assert set(sample_submission.columns) == set(submit.columns)

In [10]:
submit.to_csv('submission.csv', index=False)