In [13]:
import requests

def search_dryad_datasets(query=None, size=100): 
        ''' Search for datasets in Dryad using keyword queries. 
        Request example: https://datadryad.org/api/v2/search?page=1&per_page=100&q=link%20prediction
        Args: 
            - query: str. Keywrods for searching datasets
            - size: int. The number of returned datasets
        Return: 
            - hits: [{}]. Serializable dict of the returned dataset metadata records. 
        '''
        # Search for datasets
        api_url = "https://datadryad.org/api/v2/search"  # Replace with the actual API endpoint

        per_page = 100
        page_range = size%per_page+1
        hits = []
        
        for page in range(1, page_range+1): 
            params = {
            "q": query, 
            "per_page": per_page, 
            "page": page, 
            }

            try:
                response = requests.get(api_url, params=params)
                response.raise_for_status()  # Raise an exception for bad requests

                # Parse the JSON response
                data = response.json()
                datasets = data.get("_embedded", {}).get("stash:datasets", [])

                print(f'Crawling page {page}') 

            except requests.exceptions.RequestException as e:
                print("Error:", e)            
            
            if len(datasets) == 0: 
                break
            
            else: 
                hits.extend(datasets)
        return data


In [16]:
query='link prediction'
hits = search_dryad_datasets(query=query, size=100)

Crawling page 1


In [19]:
datasets = hits.get("_embedded", {}).get("stash:datasets", [])

In [22]:
datasets[0]['identifier']

'doi:10.5061/dryad.bk3j9kd6w'