# TED Domain Data Visualization Tutorial

This notebook demonstrates how to:
1. Fetch domain data from the TED API
2. Extract domain chopping information
3. Visualize the domains using molviewspec (mvs)

## Setup
First, let's import the required libraries

In [13]:
import requests
import molviewspec as mvs

## Step 1: Fetch TED Domain Data

We'll fetch domain data for UniProt ID Q8N9T8 from the TED API

In [14]:
# Define the UniProt ID
protein_id = "Q99683"

# Define the TED Metadata API URL
ted_url = f"https://ted.cathdb.info/api/v1/uniprot/summary/{protein_id}"

# Fetch the data
response = requests.get(ted_url)
ted_data = response.json()

print("Successfully fetched TED domain data!")
print(f"Number of domains found: {len(ted_data['data'])}")

Successfully fetched TED domain data!
Number of domains found: 7


## Step 2: Extract Domain Chopping Information

Let's extract and display the chopping information for each domain

In [15]:
# Extract chopping information
domain_choppings = []
for domain in ted_data['data']:
    domain_info = {
        'ted_id': domain['ted_id'],
        'chopping': domain['chopping'],
        'consensus_level': domain['consensus_level'],
        'plddt': domain['plddt'],
        'packing_density': domain['packing_density'],
        'norm_rg': domain['norm_rg'],
        'nres_domain': domain['nres_domain'],
        'cath_label': domain['cath_label'],
        'tax_scientific_name': domain['tax_scientific_name']
    }
    domain_choppings.append(domain_info)

# Display the information for the first domain
print("Domain Chopping Information:")
domain = domain_choppings[0]
print(f"\nDomain ID: {domain['ted_id']}")
print(f"Chopping: {domain['chopping']}")
print(f"Consensus Level: {domain['consensus_level']}")
print(f"pLDDT Score: {domain['plddt']:.2f}")

Domain Chopping Information:

Domain ID: AF-Q99683-F1-model_v4_TED01
Chopping: 93-271
Consensus Level: high
pLDDT Score: 76.80


We also need to get the reverse regions of the protein


In [16]:
def get_reverse_choppings(domain_choppings, protein_length):
    # Sort the choppings by start position
    sorted_choppings = sorted(domain_choppings, key=lambda x: int(x['chopping'].split('-')[0]))
    
    reverse_choppings = []
    current_pos = 0
    
    # Add regions between domains
    for domain in sorted_choppings:
        start, end = map(int, domain['chopping'].split('-'))
        
        # Add region before current domain if there's a gap
        if current_pos < start:
            reverse_choppings.append((current_pos, start))
        
        # Update current position to end of current domain
        current_pos = end
    
    # Add final region if there's space after the last domain
    if current_pos < protein_length:
        reverse_choppings.append((current_pos, protein_length))
    
    return reverse_choppings

# API call to get the chain length
chain_parsing_url = f"https://ted.cathdb.info/api/v1/uniprot/chainparse/{protein_id}"
response = requests.get(chain_parsing_url)
chain_parsing_data = response.json()
protein_length = chain_parsing_data['data'][0]['nres_chain']

reverse_regions = get_reverse_choppings(domain_choppings, protein_length)
print("Reverse regions:", reverse_regions)

Reverse regions: [(0, 93), (271, 304), (384, 559), (656, 672), (757, 758), (941, 1043), (1180, 1312), (1372, 1374)]


## Step 3: Visualize the whole protein

In [17]:
# Create a builder for visualization
builder = mvs.create_builder()

# For demonstration, we'll visualize the first domain
first_domain = domain_choppings[0]
protein_url = f"https://alphafold.ebi.ac.uk/files/AF-{protein_id}-F1-model_v4.cif"

model = (
    builder.download(url=protein_url, ref="download")
        .parse(format="mmcif")
        .model_structure()
)

# Display the domains
colors = ["#4e79a7", "#f28e2c", "#e15759", "#76b7b2", "#59a14f", "#edc949", "#af7aa1", "#ff9da7", "#9c755f", "#bab0ac"]
for domain_chopping, color in zip(domain_choppings, colors):
    # extract the "_TED0x" domain label
    label = domain_chopping["ted_id"].split("_")[-1]

    # extract the start-end of domain choppings
    start = int(domain_chopping['chopping'].split('-')[0])
    end = int(domain_chopping['chopping'].split('-')[1])

    _ = model.component(
        selector=dict(
            beg_label_seq_id=start,
            end_label_seq_id=end
        )
    ).label(text=label).representation().color(color=color)

# Display the rest of the protein
for reverse_chopping in reverse_regions:
    model.component(
        selector=dict(
            beg_label_seq_id=reverse_chopping[0],
            end_label_seq_id=reverse_chopping[1]
        )
    ).representation().color(color="grey")

builder

<IPython.core.display.Javascript object>

## Alternative: Visualize the domains as individual snapshots

In [38]:
from typing import Dict

def create_description(protein_id: str, ted_domain_chopping: Dict[str, str]) -> str:
    label = ted_domain_chopping["ted_id"].split("_")[-1]
    chopping = ted_domain_chopping["chopping"]
    residues = ted_domain_chopping["nres_domain"]
    plddt = ted_domain_chopping["plddt"]
    packing = ted_domain_chopping["packing_density"]
    globularity = ted_domain_chopping["norm_rg"]
    cath = ted_domain_chopping["cath_label"].split(',')[0]
    tax_scientific_name = ted_domain_chopping["tax_scientific_name"]
    description = f"""
#### Domain: [{protein_id}_{label}↗](https://ted.cathdb.info/uniprot/{protein_id})

#### Properties:
- **Chopping:** {chopping}
- **Residues:** {residues}
- **Average pLDDT:** {plddt}
- **Packing:** {packing}
- **Globularity:** {globularity}
- **Taxonomy:** {tax_scientific_name}"""
    if cath != "-":
        description += f"""
- **CATH:** [{cath}↗](https://www.cathdb.info/version/latest/cathnode/{cath})
"""
    else:
        description += f"""
- **CATH:** -
"""
    return description

def create_domain_snapshot(protein_id: str, ted_domain_chopping: Dict[str, str], color: str) -> mvs.State:
    builder = mvs.create_builder()
    
    model = (
        builder.download(url=f"https://alphafold.ebi.ac.uk/files/AF-{protein_id}-F1-model_v4.cif")
            .parse(format="mmcif")
            .model_structure()
    )

    # Display the domains
    label = ted_domain_chopping["ted_id"].split("_")[-1]
    start = int(ted_domain_chopping['chopping'].split('-')[0])
    end = int(ted_domain_chopping['chopping'].split('-')[1])

    model.component(
        selector=dict(
            beg_label_seq_id=start,
            end_label_seq_id=end
        )
    ).label(text=label).focus().representation().color(color=color)

    # Display the rest of the protein
    reverse_regions = [(0, start), (end, protein_length)]
    for reverse_chopping in reverse_regions:
        model.component(
            selector=dict(
                beg_label_seq_id=reverse_chopping[0],
                end_label_seq_id=reverse_chopping[1]
            )
        ).representation().color(color="grey")

    return builder.get_snapshot(title=f'{protein_id}_{label}', description=create_description(protein_id, ted_domain_chopping))

snapshots = []
colors = ["#4e79a7", "#f28e2c", "#e15759", "#76b7b2", "#59a14f", "#edc949", "#af7aa1", "#ff9da7", "#9c755f", "#bab0ac"]
for domain, color in zip(domain_choppings, colors):
    snapshots.append(create_domain_snapshot(protein_id, domain, color))

In [39]:
mvsj = mvs.MVSJ(
    data=mvs.States(snapshots=snapshots, metadata=mvs.GlobalMetadata(description="TED Domain Visualization"))
)
mvs.molstar_notebook(mvsj, ui="stories", width="100%", height=550)

<IPython.core.display.Javascript object>