<a href="https://colab.research.google.com/github/tozaktas/mpxv-mutations/blob/master/monkeypox_mutations.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


# Comprehensive Mutational Landscape Analysis of Monkeypox Virus Proteome
This colab will help you to visualize full mutational landscape of each MPXV protein interactively. Moreover, you can see projections of AMMS (Average Minmaxed Mutation Score) on protein structures to understand sensitive positions that can be further investigated  

In [None]:
#@title Install necessary libraries and get list of MPXV proteins. {run: "auto"}
!pip install plotly pandas py3Dmol

# Import the libraries
import pandas as pd
import py3Dmol
import re
import plotly.graph_objects as go
import plotly.express as px
import ipywidgets as widgets
import numpy as np

# Get the data.
!wget https://zenodo.org/records/13736892/files/genes-list-detailed.csv

--2025-02-04 13:21:36--  https://zenodo.org/records/13736892/files/genes-list-detailed.csv
Resolving zenodo.org (zenodo.org)... 188.185.43.25, 188.185.48.194, 188.185.45.92, ...
Connecting to zenodo.org (zenodo.org)|188.185.43.25|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 8845 (8.6K) [text/plain]
Saving to: ‘genes-list-detailed.csv.1’


2025-02-04 13:21:37 (163 KB/s) - ‘genes-list-detailed.csv.1’ saved [8845/8845]



In [None]:
#@title Select the method (ESCOTT or iGEMME). Then, execute the cell. {run: "auto"}

method = "ESCOTT" #@param ["ESCOTT", "iGEMME"]

# Get and extract the compressed data.
selected_results="all-mutations-with-"+method.lower()+".tgz"
!wget https://zenodo.org/records/13736892/files/$selected_results
!tar xzvf $selected_results

--2025-02-04 13:21:43--  https://zenodo.org/records/13736892/files/all-mutations-with-escott.tgz
Resolving zenodo.org (zenodo.org)... 188.185.43.25, 188.185.48.194, 188.185.45.92, ...
Connecting to zenodo.org (zenodo.org)|188.185.43.25|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 181694094 (173M) [application/octet-stream]
Saving to: ‘all-mutations-with-escott.tgz.1’


2025-02-04 13:21:52 (19.1 MB/s) - ‘all-mutations-with-escott.tgz.1’ saved [181694094/181694094]

all-mutations-with-escott/
all-mutations-with-escott/OPG093/
all-mutations-with-escott/OPG093/OPG093_b823e_unrelaxed_rank_001_alphafold2_ptm_model_1_seed_000.pdb.dssp
all-mutations-with-escott/OPG093/OPG093_b823e_unrelaxed_rank_001_alphafold2_ptm_model_1_seed_000.pdb.dssp.new
all-mutations-with-escott/OPG093/aliOPG093.fasta
all-mutations-with-escott/OPG093/OPG093.fasta
all-mutations-with-escott/OPG093/reformat.log
all-mutations-with-escott/OPG093/OPG093_normPred_evolCombi_1-minmax_singleline.txt
a

In [None]:
#@title Run this cell and then, select your protein. {run: "auto"}
params_list = pd.read_csv("genes-list-detailed.csv")['Gene'].to_list()

proteinPicker = widgets.Dropdown(options=params_list, value='OPG001')

proteinPicker

Dropdown(options=('OPG001', 'OPG002', 'OPG005', 'OPG015', 'OPG016', 'OPG019', 'OPG021', 'OPG022', 'OPG023', 'O…

In [None]:
#@title Plot full mutational landscape of the protein as a heatmap in viridis colors {run: "auto"}
selectedProtein = proteinPicker.value
# print(selectedProtein)
# Read the CSV file into a pandas DataFrame
try:
    df = pd.read_csv('/content/all-mutations-with-'+method.lower()+'/'+selectedProtein+'/'+selectedProtein+'_normPred_evolCombiTransposedRanksorted.csv', index_col=0).T
except FileNotFoundError:
    print("Error: File not found. Please make sure the file exists at the specified path.")
    exit()

# Create the heatmap using plotly
fig = px.imshow(df,
                labels=dict(x="Positions", y="Variants", color=method+" Score"),
                x=df.columns,
                y=df.index, \
                color_continuous_scale='viridis_r',\
                zmin=0.0, zmax=1.0)

# Customize the heatmap (optional)
fig.update_layout(title="Interactive Heatmap of " + selectedProtein,
                  xaxis_title="Positions",
                  yaxis_title="Variants")

# Show the plot
fig.show()


In [None]:
#@title Project AMMS on 3D structure of the selected protein. Dark colors indicate mutation sensitive positions. {run: "auto"}

pdb_file='/content/all-mutations-with-escott/'+selectedProtein+'/'+selectedProtein+'_with_average_sensitivity_at_occupancy_column.pdb'

import py3Dmol
import numpy as np
import matplotlib

def visualize_protein_by_occupancy(pdb_file):
    # Read the PDB file and extract occupancy values, residue numbers, and residue names
    occupancies = []
    residue_nums = []
    residue_names = []
    with open(pdb_file, 'r') as f:
        for line in f:
            if line.startswith('ATOM') or line.startswith('HETATM'):
                occupancy = float(line[54:60].strip())
                residue_num = int(line[22:26].strip())
                residue_name = line[17:20].strip()
                if residue_num not in residue_nums:  # Only add unique residue numbers
                    occupancies.append(occupancy)
                    residue_nums.append(residue_num)
                    residue_names.append(residue_name)

    # Convert occupancies to normalized values (0 to 1)
    occupancies = np.array(occupancies)
    normalized_occ = (occupancies - np.min(occupancies)) / (np.max(occupancies) - np.min(occupancies))

    # Create color mapping using viridis (reversed to have yellow for low values)
    viridis = matplotlib.colormaps.get_cmap('viridis')
    colors = [viridis(1 - val) for val in normalized_occ]  # 1 - val to reverse the colormap

    # Convert RGB colors to hex format
    hex_colors = ['0x{:02x}{:02x}{:02x}'.format(
        int(255*r), int(255*g), int(255*b))
        for r, g, b, _ in colors]

    # Initialize viewer
    view = py3Dmol.view(width=800, height=600)

    # Load PDB file
    view.addModel(open(pdb_file, 'r').read(), "pdb")

    # Set style and color by residue number with hover labels
    for i, (resnum, resname, occ, color) in enumerate(zip(residue_nums, residue_names, occupancies, hex_colors)):
        view.setStyle(
            {'resi': resnum},
            {
                'cartoon': {
                    'color': color
                }
            }
        )
        # Add hover effect
        view.addStyle(
            {'resi': resnum},
            {
                'cartoon': {
                    'color': color
                }
            },
            f"{resname} {resnum}\nOccupancy: {occ:.2f}"  # This will only show on hover
        )

    # Set other view parameters
    view.zoomTo()
    view.setBackgroundColor('white')

    return view

# Example usage
view = visualize_protein_by_occupancy(pdb_file)
view.addLabel(selectedProtein, {'position': {'x': -40, 'y': 40, 'z': 0}, 'fontColor':'blue', 'backgroundColor':'lightgray'})
view.show()