### Imports

In [1]:
import os
import numpy as np
import pandas as pd
import librosa

### Get Data

In [15]:
# Set Traing and Test Data Paths
train_audio_dir = '../input/birdclef-2024/train_audio/'
test_audio_dir = '../input/birdclef-2024/test_soundscapes/'
unlabeled_audio_dir = '../input/birdclef-2024/unlabeled_soundscapes/'

# Read All Test Files
file_list_test = [f for f in sorted(os.listdir(test_audio_dir))]

# Checking if Test Files is Empty
if len(file_list_test) > 1:
    TEST_EMPYT = False
else:
    TEST_EMPYT = True

# Read All Unlabeled Files
file_list_unlabeled = [f for f in sorted(os.listdir(unlabeled_audio_dir))]

if TEST_EMPYT:
    # Remove .ogg Extension
    file_list = [file.split('.')[0] for file in file_list_unlabeled[:50] if file.endswith('.ogg')] # Apenas os 50 primeiros audios
else:
    # Remove .ogg Extension
    file_list = [file.split('.')[0] for file in file_list_test if file.endswith('.ogg')] # Apenas os 50 primeiros audios
print('Number of test soundscapes:', len(file_list))


# This is where we will store our results
pred = {'row_id': []}

# Get labels from training data
species_list = sorted(os.listdir(train_audio_dir))
for species_code in species_list:
    pred[species_code] = []

Number of test soundscapes: 50


In [16]:
file_list_test

['readme.txt']

### Processing Loop

In [17]:
# Process audio files and make predictions
for afile in file_list:
    
    # Checking if Test Files is empty
    if TEST_EMPYT:

        # Complete file path
        path = unlabeled_audio_dir + afile + '.ogg'

    else:

        # Complete file path
        path = test_audio_dir + afile + '.ogg'
    
    # Open file with librosa and split signal into 5-second chunks
    sig, rate = librosa.load(path, sr=32000)
    # ...
    
    # 48 audio chunks (4min / 5s == 48 segments)
    chunks = [[] for i in range(48)]
    
    # Make prediction for each chunk
    # Each bird gets a random value in our case
    # since we don't actually have a model
    for i in range(len(chunks)):        
        chunk_end_time = (i + 1) * 5
        
        # Assign the row_id which we need to do for each chunk
        row_id = afile + '_' + str(chunk_end_time)
        pred['row_id'].append(row_id)
        
        for bird in species_list:
            
            # This is our random prediction score for this bird
            score = np.random.uniform()     
            
            # Put the result into our prediction dict            
            pred[bird].append(score)


In [18]:
# Make a new data frame and look at some results        
results = pd.DataFrame(pred, columns = ['row_id'] + species_list)

# Quick sanity check
print(results.head()) 
    
# Convert our results to csv
results.to_csv("submission.csv", index=False) 

          row_id    asbfly   ashdro1   ashpri1   ashwoo2   asikoe2   asiope1  \
0   1000170626_5  0.210276  0.665792  0.398583  0.423653  0.614912  0.998531   
1  1000170626_10  0.583737  0.259609  0.170758  0.983999  0.194038  0.087917   
2  1000170626_15  0.053584  0.089535  0.354976  0.033978  0.451508  0.640238   
3  1000170626_20  0.527951  0.840371  0.948641  0.246158  0.539454  0.236560   
4  1000170626_25  0.642752  0.580146  0.012807  0.478287  0.079953  0.484976   

    aspfly1   aspswi1   barfly1  ...   whbwoo2   whcbar1   whiter2    whrmun  \
0  0.094423  0.135508  0.485010  ...  0.259779  0.839425  0.478043  0.786645   
1  0.590341  0.734156  0.719902  ...  0.721481  0.723360  0.121405  0.507002   
2  0.411611  0.357734  0.655305  ...  0.318564  0.505502  0.865840  0.250909   
3  0.810665  0.660010  0.957868  ...  0.958799  0.545499  0.001235  0.409466   
4  0.258889  0.172661  0.655887  ...  0.450632  0.404628  0.452804  0.618671   

    whtkin2    woosan   wynlau1   yebb