In [1]:
import requests
import csv
import time

# List of collection codes to query
#collection_codes = ["CCUB", "CCP", "CIRAD"]  # Add more collection codes as needed

collection_codes = ["University of Guelph, Centre for Biodiversity Genomics", "MSB", "INRAE", "ZFMK", "UAM", "NHMO", "MVZ", "OGL", "O", "USNM", "MO", "NHMUK", "US", "NHMD", "QCAZ", "DMNS", "DSMZ", "B", "UJ", "IVB", "TCWC", "CORBIDI", "HNHM", "KU", "MfN", "IAvH", "NZAC", "Cuni", "FR", "CMN", "JBRJ", "SMNHTAU", "SMNS", "DBG", "K", "ZMH", "SMF", "UNAL", "Universidad Nacional de Colombia (UNAL)", "MCZ", "AWI", "SMNG", "SAIAB", "C", "CAS", "TBG", "NYBG", "NY", "HNT", "HBG", "BMNH(E)", "JNTBGRI", "LAGU", "Fundação Oswaldo Cruz (Fiocruz)", "GLM", "ROS", "BioCon", "AM", "BGBM", "NSMT", "LACM", "UWFC", "W", "CNGB", "LSUMZ", "UA", "ROM", "BPBM", "KHD", "VIMS", "YPM", "FMNH", "SIO", "UWBM", "Zoological Museum of Moscow University", "MQU", "BAMZ", "Canadian National Collection of Insects, Arachnids and Nematodes", "HM"]

In [3]:
# GRSciColl API endpoint
#base_url = "https://api.gbif.org/v1/grscicoll/collection"
base_url = "https://api.gbif.org/v1/grscicoll/lookup?institutionCode="

# Function to get data from GRSciColl API
def query_grscicoll_api(code):
    results = []
    try:
        response = requests.get(f"{base_url}{code}")
        print(f"{base_url}{code}")
        response.raise_for_status()  # Check if the request was successful
        
        # If the response contains data
        if response.json().get('institutionMatch', {}).get('matchType', '') != "NONE" :        
            data = response.json()
            matchType = data.get('institutionMatch', {}).get('matchType', '')
            institution_code = data.get('institutionMatch', {}).get('entityMatched', '').get('code', '')
            institution_name = data.get('institutionMatch', {}).get('entityMatched', '').get('name', '')
            institution_selflink = data.get('institutionMatch', {}).get('entityMatched', '').get('selflink', '')
            institution_key = data.get('institutionMatch', {}).get('entityMatched', '').get('key', '')
            institution_active = data.get('institutionMatch', {}).get('entityMatched', '').get('active', '')

            results.append([code, institution_code, institution_name, institution_selflink, institution_key, institution_active, matchType])
            #print(data)
        
    except requests.exceptions.RequestException as e:
        print(f"An error occurred while querying the GRSciColl API: {e}\n")

    return results

In [4]:
# Open a CSV file for writing with utf-8 encoding
with open('grscicoll_results.csv', mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(["Input Code", "Institution Code", "Institution Name", "selflink", "institution_key", "active", "matchType"])  # Write header row

    # Iterate through each collection code in the list
    for code in collection_codes:
        query_results = query_grscicoll_api(code)
        writer.writerows(query_results)
        time.sleep(2)  # Wait for 2 seconds before the next API request

print("The GRSciColl query results have been saved to 'grscicoll_results.csv'.")

https://api.gbif.org/v1/grscicoll/lookup?institutionCode=University of Guelph, Centre for Biodiversity Genomics
https://api.gbif.org/v1/grscicoll/lookup?institutionCode=MSB
https://api.gbif.org/v1/grscicoll/lookup?institutionCode=INRAE
https://api.gbif.org/v1/grscicoll/lookup?institutionCode=ZFMK
https://api.gbif.org/v1/grscicoll/lookup?institutionCode=UAM
https://api.gbif.org/v1/grscicoll/lookup?institutionCode=NHMO
https://api.gbif.org/v1/grscicoll/lookup?institutionCode=MVZ
https://api.gbif.org/v1/grscicoll/lookup?institutionCode=OGL
https://api.gbif.org/v1/grscicoll/lookup?institutionCode=O
https://api.gbif.org/v1/grscicoll/lookup?institutionCode=USNM
https://api.gbif.org/v1/grscicoll/lookup?institutionCode=MO
https://api.gbif.org/v1/grscicoll/lookup?institutionCode=NHMUK
https://api.gbif.org/v1/grscicoll/lookup?institutionCode=US
https://api.gbif.org/v1/grscicoll/lookup?institutionCode=NHMD
https://api.gbif.org/v1/grscicoll/lookup?institutionCode=QCAZ
https://api.gbif.org/v1/grsci