# Download Alphafold files

In [1]:
from pathlib import Path
from pprint import pprint

from protein_detective.alphafold import fetch_many, fetch_many_async

In [2]:
save_dir = Path("alphafold_files")

In [3]:
summaries = fetch_many(["A1YPR0", "O60481"], save_dir)

Fetching Alphafold summaries: 100%|██████████| 2/2 [00:00<00:00,  8.97it/s]
Downloading AlphaFold files: 100%|██████████| 2/2 [00:00<00:00, 14847.09it/s]


In [4]:
pprint(summaries)

[AlphaFoldEntry(uniprot_acc='A1YPR0',
                summary=EntrySummary(entryId='AF-A1YPR0-F1',
                                     gene='ZBTB7C',
                                     sequenceChecksum='73D82A34502B55BF',
                                     sequenceVersionDate='2007-02-06',
                                     uniprotAccession='A1YPR0',
                                     uniprotId='ZBT7C_HUMAN',
                                     uniprotDescription='Zinc finger and BTB '
                                                        'domain-containing '
                                                        'protein 7C',
                                     taxId=9606,
                                     organismScientificName='Homo sapiens',
                                     uniprotStart=1,
                                     uniprotEnd=619,
                                     uniprotSequence='MANDIDELIGIPFPNHSSEVLCSLNEQRHDGLLCDVLLVVQEQEYRTHRSVLAACSKYFKKLFTAGT

Download more cif and peaDoc with async

In [None]:
summaries2 = [s async for s in fetch_many_async(["P50613"], save_dir, what={"cif", "paeDoc"})]
pprint(summaries2)

Fetching Alphafold summaries: 100%|██████████| 1/1 [00:00<00:00,  4.87it/s]
Downloading AlphaFold files: 100%|██████████| 2/2 [00:00<00:00, 12520.31it/s]

[AlphaFoldEntry(uniprot_acc='P50613',
                summary=EntrySummary(entryId='AF-P50613-F1',
                                     gene='CDK7',
                                     sequenceChecksum='0A94BFA7DD416CEB',
                                     sequenceVersionDate='1996-10-01',
                                     uniprotAccession='P50613',
                                     uniprotId='CDK7_HUMAN',
                                     uniprotDescription='Cyclin-dependent '
                                                        'kinase 7',
                                     taxId=9606,
                                     organismScientificName='Homo sapiens',
                                     uniprotStart=1,
                                     uniprotEnd=346,
                                     uniprotSequence='MALDVKSRAKRYEKLDFLGEGQFATVYKARDKNTNQIVAIKKIKLGHRSEAKDGINRTALREIKLLQELSHPNIIGLLDAFGHKSNISLVFDFMETDLEVIIKDNSLVLTPSHIKAYMLMTLQGLEYLHQHWILHRDLKPNNLLLDENGVLK




In [None]:
!ls -sh {save_dir}

total 1.3M
392K AF-A1YPR0-F1-model_v4.pdb
292K AF-O60481-F1-model_v4.pdb
320K AF-P50613-F1-model_v4.cif
280K AF-P50613-F1-predicted_aligned_error_v4.json


## Apply density filter

In [6]:
from protein_detective.alphafold.density import filter_out_low_confidence_residues, find_high_confidence_residues

In [7]:
pdb_file = summaries[0].pdb_file
if pdb_file is None:
    raise ValueError("Summary has not been downloaded yet.")  # noqa: EM101, TRY003
residues = set(find_high_confidence_residues(pdb_file, 70))
len(residues)

214

In [8]:
out_dir = Path("density_filtered")
out_dir.mkdir(exist_ok=True, parents=True)
out_file = out_dir / pdb_file.name
out_file

PosixPath('density_filtered/AF-A1YPR0-F1-model_v4.pdb')

In [9]:
filter_out_low_confidence_residues(pdb_file, residues, out_file)

In [10]:
!ls -sh {pdb_file} {out_file}

392K alphafold_files/AF-A1YPR0-F1-model_v4.pdb
148K density_filtered/AF-A1YPR0-F1-model_v4.pdb


In [11]:
residues50 = set(find_high_confidence_residues(pdb_file, 50))
len(residues50)

275

In [12]:
residues90 = set(find_high_confidence_residues(pdb_file, 90))
len(residues90)

22

In [13]:
filter_out_low_confidence_residues(pdb_file, residues90, out_file.with_suffix(".90.pdb"))

In [14]:
!ls -sh {pdb_file} {out_file} {out_file.with_suffix(".90.pdb")}

392K alphafold_files/AF-A1YPR0-F1-model_v4.pdb
 24K density_filtered/AF-A1YPR0-F1-model_v4.90.pdb
148K density_filtered/AF-A1YPR0-F1-model_v4.pdb
