## Download data dari Xeno Canto

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import urllib.request
import json
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Set path untuk menyimpan data di Google Drive
corepath = "/content/drive/MyDrive/TUBES DL/Persiapan Data/"
if not os.path.exists(corepath):
    os.makedirs(corepath)

def download_file(url, filename):
    """
    Download a file from a URL and save it locally.
    """
    try:
        with urllib.request.urlopen(url) as response, open(filename, 'wb') as outfile:
            outfile.write(response.read())
        print(f"Downloaded: {filename}")
    except Exception as e:
        print(f"Failed to download {url}: {e}")

def save_frog_data(frog_species_list):
    """
    Fetch metadata and audio files for a list of frog species from Xeno-canto.
    """
    for frog in frog_species_list:
        count = 0
        samples_limit = 10  # Maximum number of samples per species
        path = os.path.join(corepath, frog.replace(':', '').replace(' ', '_'))

        if not os.path.exists(path):
            print(f"Creating directory {path} for {frog}...")
            os.makedirs(path)

        page = 1
        while count < samples_limit:
            url = f'https://www.xeno-canto.org/api/2/recordings?query={frog.replace(" ", "%20")}&page={page}'
            print(f"Fetching data from: {url}")

            try:
                response = urllib.request.urlopen(url)
                jsondata = json.loads(response.read().decode('utf-8'))
                recordings = jsondata.get('recordings', [])

                for record in recordings:
                    if count >= samples_limit:
                        break

                    # Save metadata
                    metadata_filename = os.path.join(path, f"sample_{count + 1}_metadata.json")
                    with open(metadata_filename, 'w') as outfile:
                        json.dump(record, outfile)
                    print(f"Saved metadata: {metadata_filename}")

                    # Download audio file
                    audio_url = record['file']  # Direct URL to audio file
                    audio_filename = os.path.join(path, f"sample_{count + 1}.mp3")
                    download_file(audio_url, audio_filename)

                    count += 1

                if not recordings or count >= samples_limit:
                    break

                page += 1

            except Exception as e:
                print(f"Error fetching data for {frog}: {e}")
                break

        print(f"Collected {count} samples for {frog}.")

# List of frog species to fetch
frog_species_list = [
    "Boana cinerascens",
    "Pepper Treefrog",
    "Pool Frog",
    "South American White-lipped Grassfrog",
    "Dendropsophus minutus",
    "Rana temporaria",
    "Rhinella marina",
    "Leptodactylus fuscus",
    "Scinax ruber"
]

save_frog_data(frog_species_list)

## Save to CSV

In [None]:
!apt-get install ffmpeg
!pip install ffmpeg-python

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).
0 upgraded, 0 newly installed, 0 to remove and 49 not upgraded.
Collecting ffmpeg-python
  Downloading ffmpeg_python-0.2.0-py3-none-any.whl.metadata (1.7 kB)
Downloading ffmpeg_python-0.2.0-py3-none-any.whl (25 kB)
Installing collected packages: ffmpeg-python
Successfully installed ffmpeg-python-0.2.0


In [None]:
import os
import pandas as pd
import glob
import ffmpeg

# Menghubungkan Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Path untuk menyimpan data
corepath = "/content/drive/My Drive/Download_Frog_Data/"
os.chdir(corepath)  # Ganti direktori kerja ke folder data

# Menggabungkan metadata CSV
extension = 'json'  # Semua file metadata disimpan dalam format JSON
all_metadata_files = [i for i in glob.glob(f'**/*.json', recursive=True)]

# mengubah JSON metadata menjadi DataFrame
def load_metadata(file):
    try:
        data = pd.read_json(file, typ='series')  # Baca JSON sebagai Series
        return pd.DataFrame([data])  # Ubah Series menjadi DataFrame
    except Exception as e:
        print(f"Error loading {file}: {e}")
        return pd.DataFrame()

# Menggabungkan semua metadata ke dalam satu DataFrame
all_metadata = pd.concat([load_metadata(f) for f in all_metadata_files], ignore_index=True)

# Simpan ke file CSV gabungan
combined_csv_path = "frog_metadata.csv"
all_metadata.to_csv(combined_csv_path, index=False, encoding='utf-8-sig')
print(f"==========Done Combine Metadata to CSV: {combined_csv_path}==========")

In [None]:
import pandas as pd

data = pd.read_csv("/content/frog_metadata.csv")
data.head()