In [6]:
from pprint import pprint

from protein_detective.uniprot import Query, search4af, search4emdb, search4pdb, search4uniprot

# Search on uniprot

In [2]:
import logging

logging.basicConfig(level=logging.WARNING)
# Set to WARNING to see only warnings
# Set to INFO to see sparql queries
# Set to DEBUG to see raw results

In [3]:
query = Query(
    taxon_id="9606",
    reviewed=True,
    subcellular_location_uniprot="nucleus",
    subcellular_location_go="GO:0005634",  # Cellular component - Nucleus
    molecular_function_go="GO:0003677",  # Molecular function - DNA binding
)

In [4]:
uniprot_accessions = search4uniprot(query, limit=200)
pprint(uniprot_accessions)



{'A0A087WUV0',
 'A0A0C5B5G6',
 'A0A0U1RQI7',
 'A0A1B0GTS1',
 'A0A1B0GVZ6',
 'A0A1B0GWH4',
 'A0A1W2PPF3',
 'A0A1W2PPK0',
 'A0A1W2PPM1',
 'A0A1W2PQ73',
 'A0A1W2PQL4',
 'A0A1W2PRP0',
 'A0A2R8Y619',
 'A0A2Z4LIS9',
 'A0A3B3IU63',
 'A0A5F9ZHS7',
 'A1A519',
 'A1YPR0',
 'A2RRD8',
 'A2RU54',
 'A3KN83',
 'A6NCS4',
 'A6NDR6',
 'A6NDX5',
 'A6NDZ8',
 'A6NE82',
 'A6NFD8',
 'A6NFI3',
 'A6NFQ7',
 'A6NGD5',
 'A6NHJ4',
 'A6NHT5',
 'A6NI15',
 'A6NJ08',
 'A6NJ46',
 'A6NJG6',
 'A6NJL1',
 'A6NJT0',
 'A6NK53',
 'A6NK75',
 'A6NKF2',
 'A6NLW8',
 'A6NM28',
 'A6NMT0',
 'A6NN14',
 'A6NNA5',
 'A6NNF4',
 'A6NP11',
 'A8K0S8',
 'A8K830',
 'A8K8V0',
 'A8MPP1',
 'A8MQ14',
 'A8MT65',
 'A8MT69',
 'A8MTJ6',
 'A8MTQ0',
 'A8MTY0',
 'A8MUV8',
 'A8MUZ8',
 'A8MWA4',
 'A8MXY4',
 'A8MYZ6',
 'A8MZ59',
 'A9YTQ3',
 'B1APH4',
 'B2RD01',
 'B2RPK0',
 'B2RXF5',
 'B4DU55',
 'B4DX44',
 'B4DXR9',
 'C9JN71',
 'C9JSJ3',
 'E7ETH6',
 'E9PAV3',
 'E9PGG2',
 'O00110',
 'O00255',
 'O00257',
 'O00268',
 'O00287',
 'O00321',
 'O00327',
 'O00358',
 

## Find Protein Data Bank (PDB) entries for uniprot entries


In [5]:
pdb_results = search4pdb(uniprot_accessions, limit=200)
pprint(pdb_results)



{'A8MT69': {PdbResult(id='4DRA',
                      method='X-Ray_Crystallography',
                      chain='E/F/G/H=1-81',
                      resolution='2.41'),
            PdbResult(id='4DRB',
                      method='X-Ray_Crystallography',
                      chain='J/K/L/M/N/O=1-81',
                      resolution='2.63'),
            PdbResult(id='4E44',
                      method='X-Ray_Crystallography',
                      chain='B/D=1-81',
                      resolution='2.1'),
            PdbResult(id='4E45',
                      method='X-Ray_Crystallography',
                      chain='B/D/G/I/L/N=1-81',
                      resolution='2'),
            PdbResult(id='4NDY',
                      method='X-Ray_Crystallography',
                      chain='B/D/H/L/M/N/U/V/W/X=8-81',
                      resolution='7'),
            PdbResult(id='4NE1',
                      method='X-Ray_Crystallography',
                      chain='B/D/H/L/M/

## Find AlphaFold models for uniprot entries

In [7]:
afresults = search4af(uniprot_accessions, limit=200)
pprint(afresults)

{'A0A087WUV0': {'A0A087WUV0'},
 'A0A0C5B5G6': {'A0A0C5B5G6'},
 'A0A0U1RQI7': {'A0A0U1RQI7'},
 'A0A1B0GTS1': {'A0A1B0GTS1'},
 'A0A1B0GVZ6': {'A0A1B0GVZ6'},
 'A0A1B0GWH4': {'A0A1B0GWH4'},
 'A0A1W2PPF3': {'A0A1W2PPF3'},
 'A0A1W2PPK0': {'A0A1W2PPK0'},
 'A0A1W2PPM1': {'A0A1W2PPM1'},
 'A0A1W2PQ73': {'A0A1W2PQ73'},
 'A0A1W2PQL4': {'A0A1W2PQL4'},
 'A0A1W2PRP0': {'A0A1W2PRP0'},
 'A0A2R8Y619': {'A0A2R8Y619'},
 'A0A2Z4LIS9': {'A0A2Z4LIS9'},
 'A0A3B3IU63': {'A0A3B3IU63'},
 'A0A5F9ZHS7': {'A0A5F9ZHS7'},
 'A1A519': {'A1A519'},
 'A1YPR0': {'A1YPR0'},
 'A2RRD8': {'A2RRD8'},
 'A2RU54': {'A2RU54'},
 'A3KN83': {'A3KN83'},
 'A6NCS4': {'A6NCS4'},
 'A6NDR6': {'A6NDR6'},
 'A6NDX5': {'A6NDX5'},
 'A6NDZ8': {'A6NDZ8'},
 'A6NE82': {'A6NE82'},
 'A6NFD8': {'A6NFD8'},
 'A6NFI3': {'A6NFI3'},
 'A6NFQ7': {'A6NFQ7'},
 'A6NGD5': {'A6NGD5'},
 'A6NHJ4': {'A6NHJ4'},
 'A6NHT5': {'A6NHT5'},
 'A6NI15': {'A6NI15'},
 'A6NJ08': {'A6NJ08'},
 'A6NJ46': {'A6NJ46'},
 'A6NJG6': {'A6NJG6'},
 'A6NJL1': {'A6NJL1'},
 'A6NJT0': {'A6NJT0'}

## Find Electron Microscopy Data Bank (EMDB) entries for uniprot entries

In [8]:
uniprot_accessions = search4emdb(uniprot_accessions, limit=200)
pprint(uniprot_accessions)

{'A8MT69': {'EMD-14351', 'EMD-33196', 'EMD-33197', 'EMD-14336'},
 'O00255': {'EMD-34195'},
 'O00268': {'EMD-31075',
            'EMD-31107',
            'EMD-31108',
            'EMD-31109',
            'EMD-31110',
            'EMD-31111',
            'EMD-31112',
            'EMD-31113',
            'EMD-31114',
            'EMD-31115',
            'EMD-31116',
            'EMD-31118',
            'EMD-31119',
            'EMD-31204',
            'EMD-31207',
            'EMD-34359',
            'EMD-34360',
            'EMD-37395',
            'EMD-37396',
            'EMD-37398',
            'EMD-37399',
            'EMD-37400',
            'EMD-37401',
            'EMD-37402',
            'EMD-37403',
            'EMD-9298',
            'EMD-9302',
            'EMD-9305',
            'EMD-9306'},
 'O00482': {'EMD-17740'},
 'O14497': {'EMD-0974'},
 'O14744': {'EMD-29677', 'EMD-27078', 'EMD-20764', 'EMD-7137', 'EMD-23609'},
 'O14746': {'EMD-12174',
            'EMD-14196',
         