# Download bird songs
A script to download bird sound files from the www.xeno-canto.org archives with metadata.

The program downloads all the recordings from the specified country. JSON files are used to store data information.


In [2]:
import urllib.request, json, os

jsonPath = "./data/json"
audioPath = "./data/audio"
country = 'India'

## Defining necessary functions
* Downloads and saves json files for number of pages in a query
* Read data from json

In [3]:
def downloadJson(country):
    print("Downloading json data")
    numPages = 1
    page = 1
    # download a json file for every page found in a query
    while page < numPages + 1:
        url = f"https://xeno-canto.org/api/2/recordings?query=cnt:%22%3D{country.replace(' ', '%20')}%22%20grp:%22birds%22&page={page}"
        print("\nDownloading json for page", str(page) + "...")
        jsonPage = urllib.request.urlopen(url)
        jsondata = json.loads(jsonPage.read().decode('utf-8'))
        filename = jsonPath + "/page-" + str(page) + ".json"
        with open(filename, 'w') as outfile:
            json.dump(jsondata, outfile)
            print("Json data saved", filename)
        # check number of pages
        numPages = jsondata['numPages']
        page = page + 1
    print("\nFound", numPages, "pages in total.")
    # each page contains 500 results, the last page can have less than 500 records
    print("Saved json data for", (numPages - 1) * 500 + len(jsondata['recordings']), "files")


# reads the json and return the list of values for selected json part
# for all Xeno Canto files found with the given search terms.
def readData(searchTerm):
    data = []
    numPages = 1
    page = 1
    # read all pages and save results in a list
    while page < numPages + 1:
        # read file
        with open(jsonPath + "/page-" + str(page) + ".json", 'r') as jsonfile:
            jsondata = jsonfile.read()
        jsondata = json.loads(jsondata)
        # check number of pages
        numPages = jsondata['numPages']
        # find "recordings" in a json and save a list with a search term
        for k in range(len(jsondata['recordings'])):
            data.append(jsondata["recordings"][k][searchTerm])
        page = page + 1
    return data

In [25]:
# create data
downloadJson(country)

Downloading json data

Downloading json for page 1...
Json data saved ./data/json/page-1.json

Downloading json for page 2...
Json data saved ./data/json/page-2.json

Downloading json for page 3...
Json data saved ./data/json/page-3.json

Downloading json for page 4...
Json data saved ./data/json/page-4.json

Downloading json for page 5...
Json data saved ./data/json/page-5.json

Downloading json for page 6...
Json data saved ./data/json/page-6.json

Downloading json for page 7...
Json data saved ./data/json/page-7.json

Downloading json for page 8...
Json data saved ./data/json/page-8.json

Downloading json for page 9...
Json data saved ./data/json/page-9.json

Downloading json for page 10...
Json data saved ./data/json/page-10.json

Downloading json for page 11...
Json data saved ./data/json/page-11.json

Downloading json for page 12...
Json data saved ./data/json/page-12.json

Downloading json for page 13...
Json data saved ./data/json/page-13.json

Downloading json for page 14...
J

In [4]:
# get recording ids, bird name and file address from json
ids, birdNames, downloadLinks, fileNames = readData('id'), readData('en'), readData('file'), readData('file-name')
numfiles = len(birdNames)
print("A total of", numfiles, "files will be downloaded")
for i in range(numfiles):
    path = f"{audioPath}/{birdNames[i].replace(':', '').replace(' ', '')}-{ids[i]}.{fileNames[i].split('.')[-1]}"
    print("\nDownloading file", i + 1, "/", numfiles, '...', path)
    # check if file already exists
    if os.path.isfile(path):
        print("File already exists")
    elif downloadLinks[i]:
        try:
            urllib.request.urlretrieve(downloadLinks[i], path)
            print("Saved file")
        except:
            print("Error downloading file")
    else:
        print("Download link not found")

A total of 16039 files will be downloaded

Downloading file 1 / 16039 ... ./data/audio/NicobarMegapode-335628.
Download link not found

Downloading file 2 / 16039 ... ./data/audio/NicobarMegapode-298956.
Download link not found

Downloading file 3 / 16039 ... ./data/audio/SnowPartridge-569023.mp3
File already exists

Downloading file 4 / 16039 ... ./data/audio/SnowPartridge-569021.mp3
File already exists

Downloading file 5 / 16039 ... ./data/audio/SnowPartridge-569020.mp3
File already exists

Downloading file 6 / 16039 ... ./data/audio/SnowPartridge-426652.mp3
File already exists

Downloading file 7 / 16039 ... ./data/audio/SnowPartridge-426650.mp3
File already exists

Downloading file 8 / 16039 ... ./data/audio/SnowPartridge-426651.mp3
File already exists

Downloading file 9 / 16039 ... ./data/audio/SnowPartridge-189386.mp3
File already exists

Downloading file 10 / 16039 ... ./data/audio/SnowPartridge-721844.mp3
File already exists

Downloading file 11 / 16039 ... ./data/audio/SnowP