In [5]:
import os
from Bio import ExPASy
from Bio import SwissProt
from Bio.PDB import PDBList, PDBParser
import requests
import pandas as pd

In [2]:

# Function to fetch the PDB ID with the best resolution for a given UniProt ID
def get_best_pdb_id(uniprot_id):
    url = f"https://www.ebi.ac.uk/pdbe/api/mappings/best_structures/{uniprot_id}"
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        if uniprot_id in data:
            best_structure = sorted(data[uniprot_id], key=lambda x: x['resolution'])[0]
            return best_structure['pdb_id'], best_structure['resolution']
    return None, None

In [8]:
AF_structs = pd.read_csv("/Users/talgalper/Documents/GitHub/PhD-MOC/Druggability_analysis/Fpocket/results_2024.05/fpocket_druggability.csv")
AF_low_conf_structs = pd.read_csv("/Users/talgalper/Documents/GitHub/PhD-MOC/Druggability_analysis/Fpocket/results_2024.05/af_low_conf_struct.csv")

# combine AF_structs and AF_low_conf_structs into a vector of uniprot_ids
uniprot_ids = pd.concat([AF_structs["uniprot_id"], AF_low_conf_structs["uniprot_id"]])

In [4]:

# Main function to process a list of UniProt IDs
def download_structures(uniprot_ids, output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    for uniprot_id in uniprot_ids:
        pdb_id, resolution = get_best_pdb_id(uniprot_id)
        if pdb_id:
            print(f"Downloading PDB {pdb_id} for UniProt ID {uniprot_id} (Resolution: {resolution} Å)")
            download_pdb(pdb_id, output_dir)
        else:
            print(f"No structure found for UniProt ID {uniprot_id}")

if __name__ == "__main__":
    # Example list of UniProt IDs
    output_dir = "/Users/talgalper/OneDrive - RMIT University/PhD/structures/PDB_query/"
    
    download_structures(uniprot_ids, output_dir)
