### Imports

In [None]:
import os
import numpy as np
import pandas as pd
import librosa

### Get Data

In [None]:
# Set Traing and Test Data Paths
train_audio_dir = '../input/birdclef-2024/train_audio/'
test_audio_dir = '../input/birdclef-2024/test_soundscapes/'

# Read All Test Files
file_list = [f for f in sorted(os.listdir(test_audio_dir))]

# Remove .ogg Extension
file_list = [file.split('.')[0] for file in file_list if file.endswith('.ogg')]

print('Number of test soundscapes:', len(file_list))

# This is where we will store our results
pred = {'row_id': []}

# Get labels from training data
species_list = sorted(os.listdir(train_audio_dir))
for species_code in species_list:
    pred[species_code] = []

### Processing Loop

In [None]:
# Process audio files and make predictions
for afile in file_list:
    
    # Complete file path
    path = test_audio_dir + afile + '.ogg'
    
    # Open file with librosa and split signal into 5-second chunks
    sig, rate = librosa.load(path, sr=32000)
    # ...
    
    # 48 audio chunks (4min / 5s == 48 segments)
    chunks = [[] for i in range(48)]
    
    # Make prediction for each chunk
    # Each bird gets a random value in our case
    # since we don't actually have a model
    for i in range(len(chunks)):        
        chunk_end_time = (i + 1) * 5
        
        # Assign the row_id which we need to do for each chunk
        row_id = afile + '_' + str(chunk_end_time)
        pred['row_id'].append(row_id)
        
        for bird in species_list:
            
            # This is our random prediction score for this bird
            score = np.random.uniform()     
            
            # Put the result into our prediction dict            
            pred[bird].append(score)


In [None]:
# Make a new data frame and look at some results        
results = pd.DataFrame(pred, columns = ['row_id'] + species_list)

# Quick sanity check
print(results.head()) 
    
# Convert our results to csv
results.to_csv("submission.csv", index=False) 