<a href="https://colab.research.google.com/github/kattens/PubChem-Data-Handler/blob/main/Visualization_and_Alignment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

in this notebook we will use py3mol package to get access to the protein sequences and 3d structure, we will also do alignment of the structure of 2 proteins as well to check the similarities between them.

In [1]:
import requests
import json

def get_pdb_info_from_uniprot(uniprot_id):
    """
    Retrieves PDB IDs associated with a UniProt ID from the UniProt API,
    and then retrieves specific information (resolution, length, etc.) about
    each PDB entry using the RCSB PDB API.

    Args:
        uniprot_id (str): The UniProt ID (e.g., P0DTC2).

    Returns:
        dict: A dictionary where keys are PDB IDs and values are dictionaries
              containing the extracted information (resolution, length, etc.).
              Returns an empty dictionary if no PDB IDs are found or if there's an error.
    """

    # Step 1: Get PDB IDs associated with the UniProt ID from UniProt API
    uniprot_url = f"https://rest.uniprot.org/uniprotkb/{uniprot_id}?format=json"

    try:
        response = requests.get(uniprot_url)
        response.raise_for_status()  # Raise HTTPError for bad responses (4xx or 5xx)
        data = response.json()

        pdb_ids = []
        for dbReference in data.get('uniProtKBCrossReferences', []):
            if dbReference.get('database') == 'PDB':
                pdb_ids.append(dbReference['id'])

        if not pdb_ids:
            print(f"No PDB entries found for UniProt ID: {uniprot_id}")
            return {}

    except requests.exceptions.RequestException as e:
        print(f"Error fetching data from UniProt for {uniprot_id}: {e}")
        return {}
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON from UniProt for {uniprot_id}: {e}")
        return {}
    except KeyError as e:
        print(f"Error parsing UniProt JSON for {uniprot_id}: Missing key: {e}")
        return {}


    # Step 2: Get information about each PDB entry from RCSB PDB API
    pdb_info = {}
    for pdb_id in pdb_ids:
        pdb_url = f"https://data.rcsb.org/rest/v1/core/entry/{pdb_id}"  # Use RCSB API for PDB details

        try:
            pdb_response = requests.get(pdb_url)
            pdb_response.raise_for_status() # Raise HTTPError for bad responses
            pdb_data = pdb_response.json()

            # Extract the desired information.  Add more fields as needed.
            extracted_info = {
                "pdb_id": pdb_id,
                "resolution": None, # Initialize to None in case it's missing
                "length": None,
                "method": None,
                "chain_id": None,
                "entity_id": None,
                "seq_start": None,
                "seq_end": None,


                # Add more fields as needed. Find fields by inspecting the JSON
                # response from the RCSB PDB API (printed in previous example)
            }


            # Extract resolution
            if 'rcsb_entry_info' in pdb_data and 'resolution_combined' in pdb_data['rcsb_entry_info']:
                extracted_info["resolution"] = pdb_data['rcsb_entry_info']['resolution_combined'][0] # Taking the first resolution if there are multiple


            # Extract method
            if 'exptl' in pdb_data and pdb_data['exptl']:
                extracted_info['method'] = pdb_data['exptl'][0]['method']


            # Now we need to access the correct entity and chain
            if 'rcsb_entry_container_identifiers' in pdb_data:
               if pdb_data['rcsb_entry_container_identifiers']['entity_ids']:
                entity_id = pdb_data['rcsb_entry_container_identifiers']['entity_ids'][0]  #default the first one
                extracted_info['entity_id'] = entity_id

                # Extract chain ID and length associated with entity ID
                for entity in pdb_data.get("entities", []):
                    if entity['entity_id'] == entity_id:
                        extracted_info['length'] = entity['rcsb_entity_info']['entity_length']

                # Extract chain ID
                if 'rcsb_entry_container_identifiers' in pdb_data:
                   if pdb_data['rcsb_entry_container_identifiers']['assembly_ids']:
                       assembly_id = pdb_data['rcsb_entry_container_identifiers']['assembly_ids'][0]  #default the first one

                       for assembly in pdb_data.get("assemblies", []):
                            if assembly['assembly_id'] == assembly_id:
                                for chain in assembly.get("rcsb_assembly_container_identifiers", {}).get("auth_asym_ids",[]):
                                   extracted_info['chain_id'] = chain   #default the first one

                # Get the range of the protein
                for struct_ref in pdb_data.get("struct_ref", []):
                    for seq_range in struct_ref.get("seq_range", []):
                         extracted_info['seq_start'] = seq_range["beg_seq_id"]
                         extracted_info['seq_end'] = seq_range["end_seq_id"]


            pdb_info[pdb_id] = extracted_info


        except requests.exceptions.RequestException as e:
            print(f"Error fetching data from RCSB PDB for {pdb_id}: {e}")
            pdb_info[pdb_id] = {"error": f"Error: {e}"} # Store error information
        except json.JSONDecodeError as e:
            print(f"Error decoding JSON from RCSB PDB for {pdb_id}: {e}")
            pdb_info[pdb_id] = {"error": f"JSON decode error: {e}"}
        except Exception as e:
            print(f"Unexpected error processing PDB ID {pdb_id}: {e}")
            pdb_info[pdb_id] = {"error": f"Unexpected error: {e}"}


    return pdb_info


# Example usage:
uniprot_id = "P00533"  # UniProt API endpoint for the P00533 entry
pdb_data = get_pdb_info_from_uniprot(uniprot_id)

if pdb_data:
    for pdb_id, info in pdb_data.items():
        print(f"\nInformation for PDB ID: {pdb_id}")
        if "error" in info:
            print(f"  Error: {info['error']}")
        else:
            print(json.dumps(info, indent=2)) # Nicely formatted JSON output
else:
    print("No PDB information found.")




Information for PDB ID: 1IVO
{
  "pdb_id": "1IVO",
  "resolution": 3.3,
  "length": null,
  "method": "X-RAY DIFFRACTION",
  "chain_id": null,
  "entity_id": "1",
  "seq_start": null,
  "seq_end": null
}

Information for PDB ID: 1M14
{
  "pdb_id": "1M14",
  "resolution": 2.6,
  "length": null,
  "method": "X-RAY DIFFRACTION",
  "chain_id": null,
  "entity_id": "1",
  "seq_start": null,
  "seq_end": null
}

Information for PDB ID: 1M17
{
  "pdb_id": "1M17",
  "resolution": 2.6,
  "length": null,
  "method": "X-RAY DIFFRACTION",
  "chain_id": null,
  "entity_id": "1",
  "seq_start": null,
  "seq_end": null
}

Information for PDB ID: 1MOX
{
  "pdb_id": "1MOX",
  "resolution": 2.5,
  "length": null,
  "method": "X-RAY DIFFRACTION",
  "chain_id": null,
  "entity_id": "1",
  "seq_start": null,
  "seq_end": null
}

Information for PDB ID: 1NQL
{
  "pdb_id": "1NQL",
  "resolution": 2.8,
  "length": null,
  "method": "X-RAY DIFFRACTION",
  "chain_id": null,
  "entity_id": "1",
  "seq_start": n

In [2]:
import requests
import json

def get_pdb_info_from_uniprot(uniprot_id):
    """
    Retrieves PDB IDs associated with a UniProt ID from the UniProt API,
    and then retrieves specific information (resolution, length, etc.) about
    each PDB entry using the RCSB PDB API, formatting the output as a dictionary
    with the structure you specified.

    Args:
        uniprot_id (str): The UniProt ID (e.g., P0DTC2).

    Returns:
        dict: A dictionary with PDB IDs as keys and dictionaries of structured
              information as values, formatted as you requested.
              Returns an empty dictionary if no PDB IDs are found or if there's an error.
    """

    # Step 1: Get PDB IDs associated with the UniProt ID from UniProt API
    uniprot_url = f"https://rest.uniprot.org/uniprotkb/{uniprot_id}?format=json"

    try:
        response = requests.get(uniprot_url)
        response.raise_for_status()  # Raise HTTPError for bad responses (4xx or 5xx)
        data = response.json()

        pdb_ids = []
        for dbReference in data.get('uniProtKBCrossReferences', []):
            if dbReference.get('database') == 'PDB':
                pdb_ids.append(dbReference['id'])

        if not pdb_ids:
            print(f"No PDB entries found for UniProt ID: {uniprot_id}")
            return {}

    except requests.exceptions.RequestException as e:
        print(f"Error fetching data from UniProt for {uniprot_id}: {e}")
        return {}
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON from UniProt for {uniprot_id}: {e}")
        return {}
    except KeyError as e:
        print(f"Error parsing UniProt JSON for {uniprot_id}: Missing key: {e}")
        return {}


    # Step 2: Get information about each PDB entry from RCSB PDB API
    pdb_info = {}
    for pdb_id in pdb_ids:
        pdb_url = f"https://data.rcsb.org/rest/v1/core/entry/{pdb_id}"  # Use RCSB API for PDB details

        try:
            pdb_response = requests.get(pdb_url)
            pdb_response.raise_for_status() # Raise HTTPError for bad responses
            pdb_data = pdb_response.json()


            # Initialize the dictionary for this PDB ID
            pdb_info[pdb_id] = {
                "method": None,
                "resolution": None,
                "chains": []  # Initialize the chains list
            }

            # Extract method
            if 'exptl' in pdb_data and pdb_data['exptl']:
                pdb_info[pdb_id]["method"] = pdb_data['exptl'][0]['method']

            # Extract resolution
            if 'rcsb_entry_info' in pdb_data and 'resolution_combined' in pdb_data['rcsb_entry_info']:
                pdb_info[pdb_id]["resolution"] = pdb_data['rcsb_entry_info']['resolution_combined'][0]

            # Extract chain information and sequence range
            chain_id = None
            seq_start = None
            seq_end = None

            if 'rcsb_entry_container_identifiers' in pdb_data:
               if pdb_data['rcsb_entry_container_identifiers']['entity_ids']:
                    entity_id = pdb_data['rcsb_entry_container_identifiers']['entity_ids'][0]

                    #Extract chain ID
                    if 'rcsb_entry_container_identifiers' in pdb_data:
                       if pdb_data['rcsb_entry_container_identifiers']['assembly_ids']:
                           assembly_id = pdb_data['rcsb_entry_container_identifiers']['assembly_ids'][0]  #default the first one

                           for assembly in pdb_data.get("assemblies", []):
                                if assembly['assembly_id'] == assembly_id:
                                    for chain in assembly.get("rcsb_assembly_container_identifiers", {}).get("auth_asym_ids",[]):
                                       chain_id = chain   #default the first one

                    # Get the range of the protein
                    for struct_ref in pdb_data.get("struct_ref", []):
                        for seq_range in struct_ref.get("seq_range", []):
                            seq_start = seq_range["beg_seq_id"]
                            seq_end = seq_range["end_seq_id"]

                    # Add the chain information to the chains list
                    if chain_id is not None and seq_start is not None and seq_end is not None:
                        pdb_info[pdb_id]["chains"].append({
                            "chain": chain_id,
                            "start": seq_start,
                            "finish": seq_end
                        })


        except requests.exceptions.RequestException as e:
            print(f"Error fetching data from RCSB PDB for {pdb_id}: {e}")
            pdb_info[pdb_id] = {"error": f"Error: {e}"} # Store error information
        except json.JSONDecodeError as e:
            print(f"Error decoding JSON from RCSB PDB for {pdb_id}: {e}")
            pdb_info[pdb_id] = {"error": f"JSON decode error: {e}"}
        except Exception as e:
            print(f"Unexpected error processing PDB ID {pdb_id}: {e}")
            pdb_info[pdb_id] = {"error": f"Unexpected error: {e}"}


    return pdb_info


# Example usage:
uniprot_id = "P00533"  # Example -> this is the example that was working very fine
pdb_data = get_pdb_info_from_uniprot(uniprot_id)

if pdb_data:
    print(json.dumps(pdb_data, indent=4))  # Print the structured output
else:
    print("No PDB information found.")

{
    "1IVO": {
        "method": "X-RAY DIFFRACTION",
        "resolution": 3.3,
        "chains": []
    },
    "1M14": {
        "method": "X-RAY DIFFRACTION",
        "resolution": 2.6,
        "chains": []
    },
    "1M17": {
        "method": "X-RAY DIFFRACTION",
        "resolution": 2.6,
        "chains": []
    },
    "1MOX": {
        "method": "X-RAY DIFFRACTION",
        "resolution": 2.5,
        "chains": []
    },
    "1NQL": {
        "method": "X-RAY DIFFRACTION",
        "resolution": 2.8,
        "chains": []
    },
    "1XKK": {
        "method": "X-RAY DIFFRACTION",
        "resolution": 2.4,
        "chains": []
    },
    "1YY9": {
        "method": "X-RAY DIFFRACTION",
        "resolution": 2.605,
        "chains": []
    },
    "1Z9I": {
        "method": "SOLUTION NMR",
        "resolution": null,
        "chains": []
    },
    "2EB2": {
        "method": "X-RAY DIFFRACTION",
        "resolution": 2.5,
        "chains": []
    },
    "2EB3": {
        "metho

In [None]:
# After the get_pdb_info_from_uniprot function and its usage:

def get_top_n_resolution(pdb_data, n=3):
    """
    Finds the top N PDB entries with the best (highest) resolution.

    Args:
        pdb_data (dict): The dictionary returned by get_pdb_info_from_uniprot.
        n (int): The number of top entries to return (default: 3).

    Returns:
        list: A list of tuples, where each tuple contains (PDB ID, resolution).
              Sorted by resolution in ascending order (best resolution first).
    """

    resolution_list = []
    for pdb_id, info in pdb_data.items():
        if "error" not in info and info["resolution"] is not None:
            try:
                resolution = info["resolution"]
                if isinstance(resolution, str):
                    resolution = float(resolution.replace("Å", ""))  # Remove Å and convert to float if it's a string
                elif isinstance(resolution, (int, float)):
                   pass  #It's already a number.
                else:
                    print(f"Warning: Unexpected resolution type for PDB ID: {pdb_id}, skipping.")
                    continue

                resolution_list.append((pdb_id, resolution))
            except ValueError:
                print(f"Warning: Could not convert resolution to float for PDB ID: {pdb_id}, skipping.")

    # Sort by resolution (lower is better)
    resolution_list.sort(key=lambda x: x[1])

    return resolution_list[:n]  # Return the top N


# Get the top 3 PDBs by resolution:
if pdb_data:  # Ensure pdb_data is not empty
    top_3 = get_top_n_resolution(pdb_data, n=3)

    print("\nTop 3 PDBs by Resolution:")
    for pdb_id, resolution in top_3:
        print(f"  {pdb_id}: {resolution:.2f} Å") # Format resolution nicely
else:
    print("No PDB data available to find top resolutions.")


Top 3 PDBs by Resolution:
  8A27: 1.07 Å
  8A2D: 1.11 Å
  5UG9: 1.33 Å


In [None]:
# Example usage:
uniprot_id = "A0A0A7RC34"  # Example: Spike glycoprotein from SARS-CoV-2
pdb_data = get_pdb_info_from_uniprot(uniprot_id)

if pdb_data:
    print(json.dumps(pdb_data, indent=4))  # Print the structured output
else:
    print("No PDB information found.")

No PDB entries found for UniProt ID: A0A0A7RC34
No PDB information found.


In [None]:
!pip install biopython
!pip install py3Dmol #for visualization

Collecting biopython
  Downloading biopython-1.85-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Downloading biopython-1.85-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m27.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: biopython
Successfully installed biopython-1.85
Collecting py3Dmol
  Downloading py3Dmol-2.4.2-py2.py3-none-any.whl.metadata (1.9 kB)
Downloading py3Dmol-2.4.2-py2.py3-none-any.whl (7.0 kB)
Installing collected packages: py3Dmol
Successfully installed py3Dmol-2.4.2


In [None]:
#We will import all the libraries here but also will add each one in the related block too.
import requests
from Bio.PDB import PDBParser,PPBuilder, PDBList,Superimposer, PDBIO, Select
import py3Dmol

In [None]:
!wget -O 8A27.pdb https://files.rcsb.org/download/8A27.pdb
!wget -O 5UG9.pdb https://files.rcsb.org/download/5UG9.pdb

--2025-03-03 22:56:54--  https://files.rcsb.org/download/8A27.pdb
Resolving files.rcsb.org (files.rcsb.org)... 3.166.135.84, 3.166.135.85, 3.166.135.67, ...
Connecting to files.rcsb.org (files.rcsb.org)|3.166.135.84|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [application/octet-stream]
Saving to: ‘8A27.pdb’

8A27.pdb                [ <=>                ] 482.92K  --.-KB/s    in 0.06s   

2025-03-03 22:56:55 (7.51 MB/s) - ‘8A27.pdb’ saved [494505]

--2025-03-03 22:56:55--  https://files.rcsb.org/download/5UG9.pdb
Resolving files.rcsb.org (files.rcsb.org)... 3.166.135.84, 3.166.135.85, 3.166.135.67, ...
Connecting to files.rcsb.org (files.rcsb.org)|3.166.135.84|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [application/octet-stream]
Saving to: ‘5UG9.pdb’

5UG9.pdb                [ <=>                ] 255.10K  --.-KB/s    in 0.05s   

2025-03-03 22:56:56 (4.57 MB/s) - ‘5UG9.pdb’ saved [261225]



In [None]:
import py3Dmol

pdb_id = "8A27"  # same example as part 2
view = py3Dmol.view(query=f'pdb:{pdb_id}', options={'doAssembly': True})
view.setStyle({'cartoon': {'color': 'spectrum'}})
view.addStyle({'model': -1}, {'stick': {}})
view.zoomTo()
view.show()


In [None]:
import requests
import json

def search_pdb_for_plasmodium_falciparum():
    """
    Searches the RCSB PDB for structures related to Plasmodium falciparum using
    a keyword search. This approach avoids relying on taxonomy IDs, which may
    have annotation issues.
    """

    search_term = "Plasmodium falciparum"
    pdb_search_url = f"https://search.rcsb.org/graphql?query={{search(terms:\"{search_term}\",return_type:entry)}}"

    try:
        response = requests.get(pdb_search_url)
        response.raise_for_status()  # Raise HTTPError for bad responses
        search_data = response.json()

        if not search_data or not search_data.get("data") or not search_data["data"].get("search"):
            print("No search results found for Plasmodium falciparum.")
            return {} # Return an empty dict instead of list

        pdb_ids = search_data["data"]["search"]
        if not pdb_ids:
             print("No PDB IDs found for Plasmodium falciparum.")
             return {} # Return an empty dict instead of list

        print (f"Found those pdb {pdb_ids}")

        # Fetch details for each PDB ID
        pdb_details = {}
        for pdb_id in pdb_ids:
             pdb_details[pdb_id] = get_pdb_details(pdb_id) #call function to return pdb details
        return pdb_details


    except requests.exceptions.RequestException as e:
        print(f"Error during RCSB PDB search: {e}")
        return {} # Return an empty dict instead of list
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON from RCSB PDB search: {e}")
        return {} # Return an empty dict instead of list
    except KeyError as e:
        print(f"Error parsing JSON from RCSB PDB search: {e}")
        return {} # Return an empty dict instead of list

def get_pdb_details(pdb_id):

    """
    Retrieves details for a given PDB ID from the RCSB PDB API.

    Args:
        pdb_id (str): The PDB ID (e.g., "1IVO").

    Returns:
        dict: A dictionary containing details about the PDB entry,
              or an empty dictionary if there was an error.
    """

    pdb_url = f"https://data.rcsb.org/rest/v1/core/entry/{pdb_id}"  # Use RCSB API for PDB details

    try:
            pdb_response = requests.get(pdb_url)
            pdb_response.raise_for_status() # Raise HTTPError for bad responses
            pdb_data = pdb_response.json()


            extracted_info = {
                "pdb_id": pdb_id,
                "resolution": None, # Initialize to None in case it's missing
                "length": None,
                "method": None,
                "chain_id": None,
                "entity_id": None,
                "seq_start": None,
                "seq_end": None,
                # Add more fields as needed. Find fields by inspecting the JSON
                # response from the RCSB PDB API (printed in previous example)
            }


            # Extract resolution
            if 'rcsb_entry_info' in pdb_data and 'resolution_combined' in pdb_data['rcsb_entry_info']:
                extracted_info["resolution"] = pdb_data['rcsb_entry_info']['resolution_combined'][0] # Taking the first resolution if there are multiple


            # Extract method
            if 'exptl' in pdb_data and pdb_data['exptl']:
                extracted_info['method'] = pdb_data['exptl'][0]['method']


            # Now we need to access the correct entity and chain
            if 'rcsb_entry_container_identifiers' in pdb_data:
               if pdb_data['rcsb_entry_container_identifiers']['entity_ids']:
                entity_id = pdb_data['rcsb_entry_container_identifiers']['entity_ids'][0]  #default the first one
                extracted_info['entity_id'] = entity_id

                # Extract chain ID and length associated with entity ID
                for entity in pdb_data.get("entities", []):
                    if entity['entity_id'] == entity_id:
                        extracted_info['length'] = entity['rcsb_entity_info']['entity_length']

                # Extract chain ID
                if 'rcsb_entry_container_identifiers' in pdb_data:
                   if pdb_data['rcsb_entry_container_identifiers']['assembly_ids']:
                       assembly_id = pdb_data['rcsb_entry_container_identifiers']['assembly_ids'][0]  #default the first one

                       for assembly in pdb_data.get("assemblies", []):
                            if assembly['assembly_id'] == assembly_id:
                                for chain in assembly.get("rcsb_assembly_container_identifiers", {}).get("auth_asym_ids",[]):
                                   extracted_info['chain_id'] = chain   #default the first one

                # Get the range of the protein
                for struct_ref in pdb_data.get("struct_ref", []):
                    for seq_range in struct_ref.get("seq_range", []):
                         extracted_info['seq_start'] = seq_range["beg_seq_id"]
                         extracted_info['seq_end'] = seq_range["end_seq_id"]

            return extracted_info
    except requests.exceptions.RequestException as e:
            print(f"Error fetching data from RCSB PDB for {pdb_id}: {e}")
            return {}
    except json.JSONDecodeError as e:
            print(f"Error decoding JSON from RCSB PDB for {pdb_id}: {e}")
            return {}
    except Exception as e:
            print(f"Unexpected error processing PDB ID {pdb_id}: {e}")
            return {}


def search_alphafold_for_plasmodium_falciparum():
    """
    Attempts to search the AlphaFold database for Plasmodium falciparum structures.
    This is a simplified approach and may not be reliable as direct programmatic
    access to the AlphaFold database is limited. It relies on precomputed data
    available from RCSB PDB, which includes AlphaFold predictions.
    """

    search_term = "Plasmodium falciparum AND experimental:false"  # Focus on AlphaFold
    pdb_search_url = f"https://search.rcsb.org/graphql?query={{search(terms:\"{search_term}\",return_type:entry)}}"

    try:
        response = requests.get(pdb_search_url)
        response.raise_for_status()
        search_data = response.json()

        if not search_data or not search_data.get("data") or not search_data["data"].get("search"):
            print("No AlphaFold results found for Plasmodium falciparum.")
            return {}  #Empty dict

        pdb_ids = search_data["data"]["search"]
        if not pdb_ids:
             print("No AlphaFold IDs found for Plasmodium falciparum.")
             return {}   #Empty dict


        print (f"Found those pdb {pdb_ids}")

        # Fetch details for each PDB ID
        pdb_details = {}
        for pdb_id in pdb_ids:
             pdb_details[pdb_id] = get_pdb_details(pdb_id) #call function to return pdb details
        return pdb_details



    except requests.exceptions.RequestException as e:
        print(f"Error during RCSB PDB AlphaFold search: {e}")
        return {} # Empty dict
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON from RCSB PDB AlphaFold search: {e}")
        return {}  # Empty dict
    except KeyError as e:
        print(f"Error parsing JSON from RCSB PDB AlphaFold search: {e}")
        return {}  # Empty dict


# Main Execution
print("Searching RCSB PDB for experimentally determined structures:")
pdb_results = search_pdb_for_plasmodium_falciparum()

if pdb_results:
    print("PDB Entries related to Plasmodium falciparum:")
    print(json.dumps(pdb_results, indent=4))
else:
    print("No PDB entries found for Plasmodium falciparum using RCSB PDB search.")

print("\nSearching RCSB PDB for AlphaFold predicted structures:")
alphafold_results = search_alphafold_for_plasmodium_falciparum()

if alphafold_results:
    print("AlphaFold Entries related to Plasmodium falciparum:")
    print(json.dumps(alphafold_results, indent=4))
else:
    print("No AlphaFold entries found for Plasmodium falciparum using RCSB PDB search.")

Searching RCSB PDB for experimentally determined structures:
Error during RCSB PDB search: 404 Client Error:  for url: https://search.rcsb.org/graphql?query=%7Bsearch(terms:%22Plasmodium%20falciparum%22,return_type:entry)%7D
No PDB entries found for Plasmodium falciparum using RCSB PDB search.

Searching RCSB PDB for AlphaFold predicted structures:
Error during RCSB PDB AlphaFold search: 404 Client Error:  for url: https://search.rcsb.org/graphql?query=%7Bsearch(terms:%22Plasmodium%20falciparum%20AND%20experimental:false%22,return_type:entry)%7D
No AlphaFold entries found for Plasmodium falciparum using RCSB PDB search.


In [None]:
taxonomy_id = 5833  # Example: Homo sapiens

In [None]:
taxonomy_id = 36329  # Example: Plasmodium falciparum

In [None]:
path_to_5833 = '/content/AF-A0A0A7RC34-F1-model_v4.pdb'


In [None]:
# Specify downloaded PDB files
pdb_file1 = "5UG9.pdb"  # Ubiquitin
pdb_file2 = path_to_5833  # BPTI

# Load the structures
# Import Bio.PDB
import Bio.PDB

parser = Bio.PDB.PDBParser(QUIET=True) # Use Bio.PDB.PDBParser
structure1 = parser.get_structure("Protein1", pdb_file1)
structure2 = parser.get_structure("Protein2", pdb_file2)

# Select chains and get only matching residues
model1 = structure1[0]
model2 = structure2[0]

chain1 = list(model1.get_chains())[0]  # Get the first chain
chain2 = list(model2.get_chains())[0]  # Get the first chain

# Get CA atoms from only the matching residues
atoms1 = []
atoms2 = []

for res1, res2 in zip(chain1.get_residues(), chain2.get_residues()):
    if res1.has_id('CA') and res2.has_id('CA'):
        atoms1.append(res1['CA'])
        atoms2.append(res2['CA'])

# Check if atoms were found before aligning
if not atoms1 or not atoms2:
    raise ValueError("No matching alpha carbon atoms (CA) found in the provided chains.")

# Perform the alignment
super_imposer = Bio.PDB.Superimposer() # Use Bio.PDB.Superimposer
super_imposer.set_atoms(atoms1, atoms2)
super_imposer.apply(structure2.get_atoms())

# Prepare PDB strings for visualization
io = Bio.PDB.PDBIO() # Use Bio.PDB.PDBIO
io.set_structure(structure1)
with open("aligned1.pdb", "w") as f:
    io.save(f)

io.set_structure(structure2)
with open("aligned2.pdb", "w") as f:
    io.save(f)

# Visualize the alignment using py3Dmol
view = py3Dmol.view(width=800, height=600)
with open("aligned1.pdb", "r") as f:
    view.addModel(f.read(), "pdb")
    view.setStyle({'model': 0}, {'cartoon': {'color': 'blue'}})

with open("aligned2.pdb", "r") as f:
    view.addModel(f.read(), "pdb")
    view.setStyle({'model': 1}, {'cartoon': {'color': 'red'}})

view.zoomTo()
view.show()