In [3]:
import json
import requests

### Obtain Dryad token

In [5]:

with open('../../secrets/dryad_client_credential.json') as f:
    dryad_token = json.load(f)


url = "https://datadryad.org/oauth/token"
client_id = dryad_token["application_id"]
client_secret = dryad_token["secret"]
grant_type = "client_credentials"

payload = {
    "client_id": client_id,
    "client_secret": client_secret,
    "grant_type": grant_type
}

headers = {
    "Content-Type": "application/x-www-form-urlencoded;charset=UTF-8"
}

response = requests.post(url, data=payload, headers=headers)

if response.status_code == 200:
    print("Token successfully retrieved:")
    print(response.json())
    
    # Save the access token to a JSON file
    access_token = response.json().get("access_token")
    token_data = {"access_token": access_token}
    file_path = "../../secrets/dryad_token.json"
    with open(file_path, "w") as json_file:
        json.dump(token_data, json_file)
    print(f"Saved to {file_path}")
    
else:
    print("Failed to retrieve token. Status code:", response.status_code)
    print("Response:", response.text)


Token successfully retrieved:
{'access_token': 'BBbUXfjHr9SfTnetnoRa4B5c5uWhfQ3uJFjFAT8lVPI', 'token_type': 'Bearer', 'expires_in': 35999, 'scope': 'all', 'created_at': 1696582066}
Saved to ../../secrets/dryad_token.json


### Search for datasets

In [4]:
import requests

def search_dryad_datasets(query=None, size=100): 
        ''' Search for datasets in Dryad using keyword queries. 
        Request example: https://datadryad.org/api/v2/search?page=1&per_page=100&q=link%20prediction
        Args: 
            - query: str. Keywrods for searching datasets
            - size: int. The number of returned datasets
        Return: 
            - hits: [{}]. Serializable dict of the returned dataset metadata records. 
        '''
        # Load token 
        file_path = "../../secrets/dryad_token.json"
        with open(file_path, "r") as json_file:
            access_token = json.load(json_file)["access_token"]

        # Search for datasets
        api_url = "https://datadryad.org/api/v2/search"  # Replace with the actual API endpoint

        headers = {
            "Authorization": f"Bearer {access_token}",
            "Content-Type": "application/json"
        }
        per_page = 100
        page_range = size%per_page+1
        hits = []
        
        for page in range(1, page_range+1): 
            params = {
            "q": query, 
            "per_page": per_page, 
            "page": page, 
            }

            try:
                response = requests.get(api_url, params=params, headers=headers)
                response.raise_for_status()  # Raise an exception for bad requests

                # Parse the JSON response
                data = response.json()
                datasets = data.get("_embedded", {}).get("stash:datasets", [])

                print(f'Crawling page {page}') 

            except requests.exceptions.RequestException as e:
                print("Error:", e)            
            
            if len(datasets) == 0: 
                break
            
            else: 
                hits.extend(datasets)
        return data


In [5]:
query='link prediction'
hits = search_dryad_datasets(query=query, size=100)

Crawling page 1


In [6]:
datasets = hits.get("_embedded", {}).get("stash:datasets", [])
datasets[0]['identifier']

'doi:10.5061/dryad.bk3j9kd6w'

In [7]:
access_token

NameError: name 'access_token' is not defined

In [11]:
datasets

[{'_links': {'self': {'href': '/api/v2/datasets/doi%3A10.5061%2Fdryad.bk3j9kd6w'},
   'stash:versions': {'href': '/api/v2/datasets/doi%3A10.5061%2Fdryad.bk3j9kd6w/versions'},
   'stash:version': {'href': '/api/v2/versions/73967'},
   'stash:download': {'href': '/api/v2/datasets/doi%3A10.5061%2Fdryad.bk3j9kd6w/download'},
   'curies': [{'name': 'stash',
     'href': 'https://github.com/CDL-Dryad/stash/blob/main/stash_api/link-relations.md#{rel}',
     'templated': 'true'}]},
  'identifier': 'doi:10.5061/dryad.bk3j9kd6w',
  'id': 38607,
  'storageSize': 189824,
  'relatedPublicationISSN': '2054-5703',
  'title': 'Data from: Link prediction in real-world multiplex networks via layer reconstruction method',
  'authors': [{'firstName': 'Amir Mahdi',
    'lastName': 'Abdolhosseini-Qomi',
    'email': 'abdolhosseini@ut.ac.ir',
    'affiliation': 'University of Tehran',
    'affiliationROR': 'https://ror.org/05vf56z40',
    'orcid': '0000-0003-0085-202X'},
   {'firstName': 'Seyed Hosseini',
  