### WGKB Gene Database Query

In [1]:
import requests

In [2]:
url = 'https://www.ncbi.nlm.nih.gov/datasets/api/datasets/v2alpha/genome/annotation_report'

payload = {
    'accession': 'GCF_001411555.2',
    'gene_types': [],
    'locations': [],
    'page_size': 20,
    'page_token': '',
    'search_text': [],
    'sort': []
}
headers = {
    'Origin': 'https://www.ncbi.nlm.nih.gov',
    'Referer': 'https://www.ncbi.nlm.nih.gov/datasets/gene/GCF_001411555.2/',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36'
}

result = requests.post(url, headers=headers, params=payload)

In [3]:
result

<Response [403]>

##### Metadata via common name

In [5]:
# !pip install ncbi-datasets-pylib

Collecting ncbi-datasets-pylib
  Downloading ncbi-datasets-pylib-16.6.1.tar.gz (270 kB)
     ---------------------------------------- 0.0/270.2 kB ? eta -:--:--
     -------------------------------------  266.2/270.2 kB 8.3 MB/s eta 0:00:01
     -------------------------------------  266.2/270.2 kB 8.3 MB/s eta 0:00:01
     -------------------------------------  266.2/270.2 kB 8.3 MB/s eta 0:00:01
     -------------------------------------  266.2/270.2 kB 8.3 MB/s eta 0:00:01
     -------------------------------------- 270.2/270.2 kB 1.4 MB/s eta 0:00:00
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Installing backend dependencies: started
  Installing backend dependencies: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Col



In [6]:
from ncbi.datasets import GenomeApi
from ncbi.datasets.openapi import ApiClient

In [8]:
def get_chrom_locations(organism_name: str) -> list:
    client = ApiClient()
    genome_api = GenomeApi(client)

    # Search for genome assemblies by organism name
    genome_summaries = genome_api.assembly_descriptors_by_taxon(organism_name)
    genomic_locations = {}

    if genome_summaries.assemblies:
        for chrom_dict in genome_summaries.assemblies[0]['assembly']['chromosomes']:
            genomic_locations[chrom_dict.accession_version] = [chrom_dict.gc_count,
                                                               chrom_dict.length,
                                                               chrom_dict.name]
    elif (not genome_summaries.assemblies) or (not genomic_locations):
        print(f"No genomes found for organism: {organism_name}")

    return genomic_locations

In [9]:
# Walnut

get_chrom_locations('Juglans regia (English walnut)')

{'NC_049901.1': ['16339038', '45207397', '1'],
 'NC_049902.1': ['13647496', '37821870', '2'],
 'NC_049903.1': ['12603732', '35064427', '3'],
 'NC_049904.1': ['12587123', '34823025', '4'],
 'NC_049905.1': ['8048844', '22562875', '5'],
 'NC_049906.1': ['14227033', '39020271', '6'],
 'NC_049907.1': ['18883011', '52418484', '7'],
 'NC_049908.1': ['10989783', '30564197', '8'],
 'NC_049909.1': ['8781156', '24263475', '9'],
 'NC_049910.1': ['13653813', '37707155', '10'],
 'NC_049911.1': ['13295843', '37114715', '11'],
 'NC_049912.1': ['11347187', '31492331', '12'],
 'NC_049913.1': ['14317098', '39757759', '13'],
 'NC_049914.1': ['10372462', '28841373', '14'],
 'NC_049915.1': ['7285363', '20407330', '15'],
 'NC_049916.1': ['10331800', '28711772', '16'],
 'NC_028617.1': ['57970', '160537', 'Pltd'],
 None: ['10479976', '27007677', 'Un']}

In [10]:
from ncbi.datasets import GeneApi

In [11]:
gene_api = GeneApi()
help(gene_api)

Help on GeneApi in module ncbi.datasets.openapi.api.gene_api object:

class GeneApi(builtins.object)
 |  GeneApi(api_client=None)
 |  
 |  NOTE: This class is auto generated by OpenAPI Generator
 |  Ref: https://openapi-generator.tech
 |  
 |  Do not edit the class manually.
 |  
 |  Methods defined here:
 |  
 |  __init__(self, api_client=None)
 |      Initialize self.  See help(type(self)) for accurate signature.
 |  
 |  download_gene_package(self, gene_ids, **kwargs)
 |      Get a gene dataset by gene ID  # noqa: E501
 |      
 |      Get a gene dataset including gene, transcript and protein fasta sequence, annotation and metadata by gene ID.  # noqa: E501
 |      This method makes a synchronous HTTP request by default. To make an
 |      asynchronous HTTP request, please pass async_req=True
 |      
 |      >>> thread = api.download_gene_package(gene_ids, async_req=True)
 |      >>> result = thread.get()
 |      
 |      Args:
 |          gene_ids ([int]): NCBI gene ids
 |      
 

In [49]:
walnut_accessions = get_chrom_locations('Juglans regia (English walnut)')
human_accessions = get_chrom_locations('Homo sapiens')

gene_api.gene_orthologs_by_id(51240)

{'genes': {'genes': [{'gene': {'annotations': [{'assemblies_in_scope': [{'accession': 'GCF_027887165.1',
                                                                         'name': 'mMonDom1.pri'}],
                                                'release_date': '2023-05-17',
                                                'release_name': 'GCF_027887165.1-RS_2023_05'}],
                               'chromosomes': ['4'],
                               'common_name': 'gray short-tailed opossum',
                               'description': 'ORMDL sphingolipid biosynthesis '
                                              'regulator 1',
                               'gene_groups': [{'id': '94101',
                                                'method': 'NCBI Ortholog'}],
                               'gene_id': '100021319',
                               'genomic_ranges': [{'accession_version': 'NC_077230.1',
                                                   'range': [{'begin':

In [19]:
# gene_api.gene_metadata_by_tax_and_symbol(symbols='', taxon='')

gene_api.gene_tax_name_query('Juglans regia (English walnut)')

{'sci_name_and_ids': [{'common_name': 'English walnut',
                       'sci_name': 'Juglans regia',
                       'tax_id': '51240'},
                      {'sci_name': 'Juglans microcarpa x Juglans regia',
                       'tax_id': '2249226'},
                      {'sci_name': 'Juglans sigillata x Juglans regia',
                       'tax_id': '1441050'},
                      {'sci_name': 'Juglans major x Juglans regia',
                       'tax_id': '1141591'},
                      {'sci_name': 'Roystonea regia', 'tax_id': '145709'},
                      {'sci_name': 'Drosera regia', 'tax_id': '4371'},
                      {'sci_name': 'Odontella regia', 'tax_id': '1335017'},
                      {'common_name': 'English oak',
                       'sci_name': 'Quercus robur',
                       'tax_id': '38942'},
                      {'common_name': 'black walnut',
                       'sci_name': 'Juglans nigra',
                       't

In [38]:
client = ApiClient()
genome_api = GenomeApi(client)

# Search for genome assemblies by organism name
genome_summaries = genome_api.assembly_descriptors_by_taxon('Juglans regia (English walnut)')
print(genome_summaries.assemblies)

[{'assembly': {'annotation_metadata': {'busco': {'busco_lineage': 'eudicots_odb10',
                                                'busco_ver': '4.0.2',
                                                'complete': 0.99269134,
                                                'duplicated': 0.091573514,
                                                'fragmented': 0.0017196905,
                                                'missing': 0.005588994,
                                                'single_copy': 0.9011178,
                                                'total_count': '2326'},
                                      'file': [{'estimated_size': '11438918',
                                                'type': 'GENOME_GFF'},
                                               {'estimated_size': '261191555',
                                                'type': 'GENOME_GBFF'},
                                               {'estimated_size': '23490972',
                           

In [19]:
genome_api = GenomeApi()
help(genome_api)

Help on GenomeApi in module ncbi.datasets.openapi.api.genome_api object:

class GenomeApi(builtins.object)
 |  GenomeApi(api_client=None)
 |  
 |  NOTE: This class is auto generated by OpenAPI Generator
 |  Ref: https://openapi-generator.tech
 |  
 |  Do not edit the class manually.
 |  
 |  Methods defined here:
 |  
 |  __init__(self, api_client=None)
 |      Initialize self.  See help(type(self)) for accurate signature.
 |  
 |  assembly_descriptors_by_accessions(self, accessions, **kwargs)
 |      Get genome metadata by accession  # noqa: E501
 |      
 |      Get detailed metadata for assembled genomes by accession in a JSON output format.  # noqa: E501
 |      This method makes a synchronous HTTP request by default. To make an
 |      asynchronous HTTP request, please pass async_req=True
 |      
 |      >>> thread = api.assembly_descriptors_by_accessions(accessions, async_req=True)
 |      >>> result = thread.get()
 |      
 |      Args:
 |          accessions ([str]):
 |      
