
**Run the following in Terminal**

Steps:
1. Install requires packages
2.  Specify Folder Directory to drop .mp3 files

   a. .../scripts/
   b. mkdir mp3_files
   c. mkdir soundscapes_new_york_wav_a
   d. mkdir soundscapes_new_york_wav_c
   e. mkdir soundscapes_new_york_wav_b

3.  Update input directory   

What this does:

Connects to xeno-canto api
- Gets page count from url query (which defines new york coordinates and quality)
- Loops through each page and download .mp3 files
- Creates a csv metadata files that maps the file name with the metadata from xeno-canto
- Loops again over downloaded files and converts them to .wav
 

In [None]:
!pip install pydub
!pip install ffmpeg

In [None]:

# ----------------------------------------------------------------
# New York Quality A

import requests
import json
import csv
import os
from urllib.request import urlretrieve
from pydub import AudioSegment
from datetime import datetime

SSL_CERT_DIR = '/usr/local/etc/openssl@1.1/cert.pem'

url = 'https://xeno-canto.org/api/2/recordings?query=cnt: "United States"&grp:="soundscape"&query=box:40.477399,-79.762590,45.015861,-71.777491+q:"a"'

response = requests.get(url)

num_pages = json.loads(response.text)['numPages']
full_response = []

for page in range(1, num_pages + 1):
    print("page number", page)

    url = f"https://xeno-canto.org/api/2/recordings?query=cnt: \"United States\"&page={page}&grp:=\"soundscape\"&query=box:40.477399,-79.762590,45.015861,-71.777491+q:A"

    response = requests.get(url)

    print("response.status ---", response.status_code)

    full_response.append(json.loads(response.text))

headers = list(full_response[0]['recordings'][0].keys())

with open('new_york_a_output.csv', 'w', newline='') as csvfile:
    csv_writer = csv.writer(csvfile)
    csv_writer.writerow(headers)

    print(" -------------------------------- Starting downloads at {} --------------------------------".format(datetime.now()))

    for single_page in full_response:
        print(" -------------------------------- Processing page number {} --------------------------------".format(single_page['page']))

        recordings = single_page['recordings']

        for index, recording in enumerate(recordings):
            print("\nDownloading recording number {} with id {} from page {}".format(index, recording['id'], single_page['page']))

            csv_writer.writerow(recording.values())

            urlretrieve(recording['file'], f"../scripts/mp3_files/{recording['id']}.{recording['file-name'][-3:]}")

input_dir = '/Users/alicerose/Desktop/Workspace/new-xeno-canto/scripts/mp3_files'
output_dir = '/Users/alicerose/Desktop/Workspace/new-xeno-canto/scripts/soundscapes_new_york_wav_a'

print(" -------------------------------- Starting WAV conversion at {} --------------------------------".format(datetime.now()))

for filename in os.listdir(input_dir):
    if filename.endswith(".mp3") or filename.endswith(".MP3"):
        print("Converting {} to wav...".format(filename))
        output_filename = os.path.join(output_dir, "{}.wav".format(os.path.splitext(filename)[0]))
        sound = AudioSegment.from_mp3(os.path.join(input_dir, filename))
        sound.export(output_filename, format="wav")

In [None]:
# ----------------------------------------------------------------
# New York Quality B

import requests
import json
import csv
import os
from urllib.request import urlretrieve
from pydub import AudioSegment
from datetime import datetime

SSL_CERT_DIR = '/usr/local/etc/openssl@1.1/cert.pem'

url = 'https://xeno-canto.org/api/2/recordings?query=cnt: "United States"&grp:="soundscape"&query=box:40.477399,-79.762590,45.015861,-71.777491+q:"b"'

response = requests.get(url)

num_pages = json.loads(response.text)['numPages']
full_response = []

for page in range(1, num_pages + 1):
    print("page number", page)

    url = f"https://xeno-canto.org/api/2/recordings?query=cnt: \"United States\"&page={page}&grp:=\"soundscape\"&query=box:40.477399,-79.762590,45.015861,-71.777491+q:B"

    response = requests.get(url)

    print("response.status ---", response.status_code)

    full_response.append(json.loads(response.text))

headers = list(full_response[0]['recordings'][0].keys())

with open('new_york_b_output.csv', 'w', newline='') as csvfile:
    csv_writer = csv.writer(csvfile)
    csv_writer.writerow(headers)

    print(" -------------------------------- Starting downloads at {} --------------------------------".format(datetime.now()))

    for single_page in full_response:
        print(" -------------------------------- Processing page number {} --------------------------------".format(single_page['page']))

        recordings = single_page['recordings']

        for index, recording in enumerate(recordings):
            print("\nDownloading recording number {} with id {} from page {}".format(index, recording['id'], single_page['page']))

            csv_writer.writerow(recording.values())

            urlretrieve(recording['file'], f"../scripts/mp3_files/{recording['id']}.{recording['file-name'][-3:]}")

input_dir = '/Users/alicerose/Desktop/new-xeno-canto/scripts/mp3_files'
output_dir = '/Users/alicerose/Desktop/new-xeno-canto/scripts/soundscapes_new_york_wav_b'

print(" -------------------------------- Starting WAV conversion at {} --------------------------------".format(datetime.now()))

for filename in os.listdir(input_dir):
    if filename.endswith(".mp3") or filename.endswith(".MP3"):
        print("Converting {} to wav...".format(filename))
        output_filename = os.path.join(output_dir, "{}.wav".format(os.path.splitext(filename)[0]))
        sound = AudioSegment.from_mp3(os.path.join(input_dir, filename))
        sound.export(output_filename, format="wav")

In [None]:
# ----------------------------------------------------------------
# New York Quality C

import requests
import json
import csv
import os
from urllib.request import urlretrieve
from pydub import AudioSegment
from datetime import datetime

SSL_CERT_DIR = '/usr/local/etc/openssl@1.1/cert.pem'

url = 'https://xeno-canto.org/api/2/recordings?query=cnt: "United States"&grp:="soundscape"&query=box:40.477399,-79.762590,45.015861,-71.777491+q:"c"'

response = requests.get(url)

num_pages = json.loads(response.text)['numPages']
full_response = []

for page in range(1, num_pages + 1):
    print("page number", page)

    url = f"https://xeno-canto.org/api/2/recordings?query=cnt: \"United States\"&page={page}&grp:=\"soundscape\"&query=box:40.477399,-79.762590,45.015861,-71.777491+q:C"

    response = requests.get(url)

    print("response.status ---", response.status_code)

    full_response.append(json.loads(response.text))

headers = list(full_response[0]['recordings'][0].keys())

with open('new_york_c_output.csv', 'w', newline='') as csvfile:
    csv_writer = csv.writer(csvfile)
    csv_writer.writerow(headers)

    print(" -------------------------------- Starting downloads at {} --------------------------------".format(datetime.now()))

    for single_page in full_response:
        print(" -------------------------------- Processing page number {} --------------------------------".format(single_page['page']))

        recordings = single_page['recordings']

        for index, recording in enumerate(recordings):
            print("\nDownloading recording number {} with id {} from page {}".format(index, recording['id'], single_page['page']))

            csv_writer.writerow(recording.values())

            urlretrieve(recording['file'], f"../scripts/mp3_files/{recording['id']}.{recording['file-name'][-3:]}")

input_dir = '/Users/alicerose/Desktop/new-xeno-canto/scripts/mp3_files'
output_dir = '/Users/alicerose/Desktop/new-xeno-canto/scripts/soundscapes_new_york_wav_c'

print(" -------------------------------- Starting WAV conversion at {} --------------------------------".format(datetime.now()))

for filename in os.listdir(input_dir):
    if filename.endswith(".mp3") or filename.endswith(".MP3"):
        print("Converting {} to wav...".format(filename))
        output_filename = os.path.join(output_dir, "{}.wav".format(os.path.splitext(filename)[0]))
        sound = AudioSegment.from_mp3(os.path.join(input_dir, filename))
        sound.export(output_filename, format="wav")