<a href="https://colab.research.google.com/github/uio-mana/BioData-Advanced-SA/blob/main/Colab_introduction_Python.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# prompt: download and expand dwc archive into occurrence table from https://api.gbif.org/v1/occurrence/download/request/0017682-241107131044228.zip

import requests
import zipfile
import io
import pandas as pd
import os

def download_and_extract_dwca(download_link):
  """Downloads and extracts a Darwin Core Archive (DwCA) from a given URL.

  Args:
    download_link: The URL of the DwCA zip file.

  Returns:
    A pandas DataFrame containing the occurrence data, or None if an error occurs.
  """
  try:
    response = requests.get(download_link, stream=True)
    response.raise_for_status()  # Raise an exception for non-200 status codes

    with zipfile.ZipFile(io.BytesIO(response.content)) as z:
      for filename in z.namelist():
        if filename.endswith(".csv"):
          occurrence_file = filename
          break
      else:
        # No suitable occurrence file found in the archive
        print("Error: No occurrence.csv file found in the DwCA archive")
        return None

      with z.open(occurrence_file) as f:
          df = pd.read_csv(f, sep='\t')
          return df

  except requests.exceptions.RequestException as e:
    print(f"Error downloading the file: {e}")
    return None
  except zipfile.BadZipFile:
    print("Error: Invalid zip file")
    return None
  except KeyError:
      print("Error: No occurrence data found in the archive")
      return None
  except Exception as e:
    print(f"An unexpected error occurred: {e}")
    return None


# Example usage
download_link = "https://api.gbif.org/v1/occurrence/download/request/0017682-241107131044228.zip"
occurrence_df = download_and_extract_dwca(download_link)


# if occurrence_df is not None:
#   print(occurrence_df.head()) # print first 5 rows of the DataFrame
#   # You can now work with the DataFrame (e.g., save it to a file)
#   #occurrence_df.to_csv("occurrence_data.csv", index=False)