In [None]:
import os
import json
import numpy as np
import pandas as pd
import librosa

# First, load list of audio files. We could use 'test.csv' as well,
# but for now, let's stick with parsing the test_soundscape folder.
test_audio_dir = '../input/birdclef-2022/test_soundscapes/'
file_list = [f.split('.')[0] for f in sorted(os.listdir(test_audio_dir))]

# At the moment, there should only be a single soundscape visible.
# During the submission re-run, all other hidden soundscapes
# will be visible too and can be processed by your notebook.
print('Number of test soundscapes:', len(file_list))

In [None]:
# Load scored birds
with open('../input/birdclef-2022/scored_birds.json') as sbfile:
    scored_birds = json.load(sbfile)

In [None]:
# This is where we will store our results
pred = {'row_id': [], 'target': []}

# Process audio files and make predictions
for afile in file_list:

    # Complete file path
    path = test_audio_dir + afile + '.ogg'

    # Open file with librosa and split signal into 5-second chunks
    # sig, rate = librosa.load(path)
    # ...

    # Let's assume we have a list of 12 audio chunks (1min / 5s == 12 segments)
    chunks = [[] for i in range(12)]

    # Make prediction for each chunk
    # Each scored bird gets a random value in our case
    # since we don't actually have a model
    for i in range(len(chunks)):
        chunk_end_time = (i + 1) * 5
        for bird in scored_birds:

            # This is our random prediction score for this bird
            score = np.random.uniform()

            # Assemble the row_id which we need to do for each scored bird
            row_id = afile + '_' + bird + '_' + str(chunk_end_time)

            # Put the result into our prediction dict and
            # apply a "confidence" threshold of 0.5
            pred['row_id'].append(row_id)
            pred['target'].append(True if score > 0.5 else False)

Finally, we need to save our results to a csv-file named 'submission.csv'.

Important: Make sure to include 'True' or 'False' for all scored birds for every 5-second segment of every file. If the number of rows in your 'submission.csv' doesn't match the ground truth, submission will fail.

In [None]:
# Make a new data frame and look at some results
results = pd.DataFrame(pred, columns = ['row_id', 'target'])

# Quick sanity check
print(results.head())

# Convert our results to csv
results.to_csv("submission.csv", index=False)

ex.)
row_id  target
0  soundscape_453028782_akiapo_5    True
1  soundscape_453028782_aniani_5   False
2  soundscape_453028782_apapan_5   False
3  soundscape_453028782_barpet_5   False
4  soundscape_453028782_crehon_5    True

Now, we are ready to subnmit, and these are the steps we need to take:

Go to notebook settings (on the left, below the "Data" explorer) and disable "Internet".
Click "Save Version" (top right).
Open notebook under the "Code" tab of the competition. It will show up under "Your work".
Now click on the three dots in the upper right corner and select "Submit to Competition" (see screenshot below).
Follow the on-screen instructions.
Wait for the notebook to finish, results will show up under "My Submissions".