# Make your own PePrMInt CSV ! 

This notebook allows you to generate custom CSV files for PePrMInt analysis.

**Note**: This notebook is optimized for both Jupyter and Voila environments.

In [1]:
import glob
import os
import tempfile
from pathlib import Path
import sys

from urllib import request
import gzip
from io import BytesIO

from Bio import PDB
from Bio.PDB import PDBParser
from Bio.PDB import PDBIO
from Bio.PDB.PDBExceptions import PDBConstructionWarning
from pathlib import Path
import warnings
import numpy as np
import glob
import vg

import sys
import os
from tqdm import tqdm
from IPython.display import clear_output, display
from ipyfilechooser import FileChooser
import nglview as nv

# Add this import with the other imports in the first cell
import subprocess

# Voila compatibility checks
try:
    # Check if running in Voila
    import os
    VOILA_MODE = os.environ.get('VOILA_APP_IP') is not None
except:
    VOILA_MODE = False

# Create TMPDir it does not exist yet.
if 'TMPDIR' not in locals():
    TMPDIR = tempfile.mkdtemp()

# Initialize global variables with safe defaults
STRUCTURE_FOLDER = ""
OUTPUT_FOLDER = ""
PYMOL_ALIGNMENT_METHOD = "tmalign"
ALIGNMENT_ONLYCA = True
EXCLUDE_LOOPS = True
SUPERPOSE = False
REFERENCE_PDB = ""
AA_REF1 = 0
AA_REF2 = 0
AA_REF3 = 0
OUTPUTCSV = ""
DOMAINNAME = "DEV"

print(f"Running in {'Voila' if VOILA_MODE else 'Jupyter'} mode")



Running in Jupyter mode


  import pkg_resources


In [None]:
#Functions

def create_dirs():
    if not os.path.exists(OUTPUT_FOLDER):
        os.makedirs(OUTPUT_FOLDER)

# Replace the existing align_with_pymol function with this:
def align_with_tmalign(pdblist, pdbDict):
    global OUTPUT_FOLDER
    global STRUCTURE_FOLDER
    global REFERENCE_PDB

    if SUPERPOSE:
        print("Aligning all proteins with TMalign")
        
        # Get reference PDB path
        ref_pdb_path = f"{STRUCTURE_FOLDER}/{REFERENCE_PDB}.pdb"
        if not os.path.exists(ref_pdb_path):
            print(f"Error: Reference PDB {ref_pdb_path} not found")
            return False
        
        # Create output directory if it doesn't exist
        os.makedirs(OUTPUT_FOLDER, exist_ok=True)
        
        # Prepare alignment commands
        exec_list = []
        
        for pdb_name, pdb_path in pdbDict.items():
            if pdb_name == REFERENCE_PDB:
                # Just copy the reference file
                import shutil
                shutil.copy(pdb_path, f"{OUTPUT_FOLDER}/{pdb_name}.pdb")
                continue
            
            # Prepare TMalign command for each structure
            out_name = f"{OUTPUT_FOLDER}/{pdb_name}.pdb"
            tmp_name = f"{pdb_path}_tmp"
            
            cmd = ""
            cmd += f"TMalign {pdb_path} {ref_pdb_path} -o {tmp_name} ; "
            cmd += f"rm -rf {tmp_name} ; "
            cmd += f"rm -rf {tmp_name}_all ; "
            cmd += f"rm -rf {tmp_name}_atm ; "
            cmd += f"rm -rf {tmp_name}_all_atm_lig ; "
            cmd += f"mv {tmp_name}_all_atm {out_name}"
            
            exec_list.append((cmd, pdb_path, out_name))
        
        # Execute TMalign commands
        print(f"Processing {len(exec_list)} structures...")
        for i, (cmd, original_file, aligned_file) in enumerate(tqdm(exec_list)):
            try:
                # Execute TMalign command
                result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
                
                if result.returncode != 0:
                    print(f"Warning: TMalign failed for {original_file}")
                    print(f"Error: {result.stderr}")
                    continue
                
                # Fix the TMalign output (keep only chain A with original chain ID)
                if os.path.exists(aligned_file):
                    _fix_tmalign_pdb(original_file, aligned_file)
                else:
                    print(f"Warning: TMalign output file not found for {original_file}")
                    
            except Exception as e:
                print(f"Error processing {original_file}: {e}")
        
        print("TMalign alignment completed")
        return True
    else:
        # No alignment needed, copy files to output folder or use existing structure folder
        print("Skipping alignment - using structures as-is")
        if OUTPUT_FOLDER != STRUCTURE_FOLDER:
            print(f"Copying structures from {STRUCTURE_FOLDER} to {OUTPUT_FOLDER}")
            os.makedirs(OUTPUT_FOLDER, exist_ok=True)
            import shutil
            for pdb_name, pdb_path in pdbDict.items():
                shutil.copy(pdb_path, f"{OUTPUT_FOLDER}/{pdb_name}.pdb")
        return True

def _fix_tmalign_pdb(original_file, tmaligned_file):
    """
    TMalign repeats the reference protein as chain "B" in the output file,
    so we update the aligned file with the (superimposed) target from chain
    "A" only (keeping the original chain name)
    """
    # Extract the original CHAIN ID from the first atom line
    chainID = "A"
    try:
        with open(original_file, 'r') as f:
            for line in f:
                if line.startswith("ATOM"):
                    if len(line) > 21 and line[21] != " ":
                        chainID = line[21]
                    break
    except:
        chainID = "A"  # fallback
    
    # Keep only atom lines that belong to chain "A" from TMalign output
    newLines = []
    try:
        with open(tmaligned_file, 'r') as f:
            for line in f:
                if line.startswith("ATOM") and len(line) > 21 and line[21] == "A":
                    # Replace chain A with original chain ID
                    newLines.append(line[0:21] + chainID + line[22:])
    except:
        print(f"Error reading TMalign output: {tmaligned_file}")
        return
    
    # Write the corrected lines back to the file
    try:
        with open(tmaligned_file, 'w') as f:
            for line in newLines:
                f.write(line)
    except:
        print(f"Error writing corrected file: {tmaligned_file}")

def get_structure(path:str):
    """
    Read a PDB file with PDB parser
    Args:
        path: String. Path to the variable

    Returns:

    """
    parser = PDB.PDBParser()
    pdbCode = Path(path).stem #Get PDB code
    try:
        with warnings.catch_warnings():
            warnings.simplefilter('ignore', PDBConstructionWarning)
            structure = parser.get_structure(id=pdbCode,
                                             file=path)
    except:
        print(f"Reading Error - {path}")
    return structure


def write_structure(output_folder:str, structure:PDB.Structure.Structure):
    """
    Write the Biopython structure
    Note that the 'structure.id' will be used as pdbname.
    Args:
        output_folder: (output pdb path)
        structure (PDB.Structure.Structure): Biopython structure

    Returns:
        None
    """
    pdbCode = structure.id
    outputPath = f"{output_folder}/{pdbCode}.pdb"
    io = PDBIO()
    io.set_structure(structure)
    io.save(outputPath)


def get_translation_vector(structure:PDB.Structure.Structure, res1:int, res2:int, res3:int):
    """
    Get the translation vector between the centroid of a triangle formed by 3 amino acids's CA and the origin (0,0,0)
    Args:
        structure (PDB.Structure.Structure): Biopython PDB Structure
        res1 (int): Residue number forming Triangle vertex 1
        res2 (int): Residue number forming Triangle vertex 2
        res3 (int): Residue number forming Triangle vertex 3

    Returns:
        translation (np.array):  translation vector (1x3)

    """
    chain = structure[0].child_list[0].id
    p1 = structure[0][chain][res1]['CA'].get_coord()
    p2 = structure[0][chain][res2]['CA'].get_coord()
    p3 = structure[0][chain][res3]['CA'].get_coord()

    translation = list(-np.mean(np.array([p1, p2, p3]), axis=0))

    return translation


def get_rotation_matrix(structure:PDB.Structure.Structure, res1:int, res2:int, res3:int, orientation='z'):
    """
    Get the rotation matrix between the normal of a triangle formed by 3 amino acids's CA and a plane (x,y,z)
    Args:
        structure (PDB.Structure.Structure): Biopython PDB Structure
        res1 (int): Residue number forming Triangle vertex 1
        res2 (int): Residue number forming Triangle vertex 2
        res3 (int): Residue number forming Triangle vertex 3
        orientation (str): Axis used for alignment (default = 'z'

    Returns:
        rotation (np.array):  Rotation matrix (3x3)

    """

    def get_normal_COM(structure, res1, res2, res3):
        """
        Calcul the normal and the geom center of a structure
        Args:
            structure:
            res1:
            res2:
            res3:

        Returns:

        """
        # ROTATION
        # 1. compute vectors
        # Get new coordinates
        chain = structure[0].child_list[0].id
        p1 = structure[0][chain][res1]['CA'].get_coord()
        p2 = structure[0][chain][res2]['CA'].get_coord()
        p3 = structure[0][chain][res3]['CA'].get_coord()

        # Translation = Nul
        A = p2 - p1
        B = p3 - p1
        # 2. compute triangle NORM which is the cross product of vector A/B
        N = np.cross(A, B)

        coords = np.array([x.get_coord() for x in structure.get_atoms()])
        COM = coords.mean(axis=0)

        return N, COM

    def test_rotation(structure, res1, res2, res3):
        # Recalculate angle etc....
        N, COM = get_normal_COM(structure, res1, res2, res3)
        # Recalculate angle
        angle = vg.angle(N, np.array([0, 0, -1]))
        return (angle == 0 and COM[2] > 0) or (angle == 180 and COM[2] > 0)

    N,COM = get_normal_COM(structure, res1, res2, res3)

    # This norm will be our translation vector to all our atoms
    axis = {'x':[-1,0,0],
            'y':[0,-1,0],
            'z':[0,0,-1]}
    #Create the reference vector, per default we want to align on the z axis so it will be [0,0,1]
    refVector = PDB.vectors.Vector(axis[orientation])
    normal = PDB.vectors.Vector(N) #The normal should be a biopython object

    # Transformation 1
    temp = structure.copy()

    #case1 : normal and rotation
    rotation = PDB.vectors.rotmat(normal, refVector)
    temp.transform(rotation, [0, 0, 0])

    #If it doesn't work, case2: -normal and rotation
    if not test_rotation(temp, res1, res2, res3):
        temp = structure.copy()
        rotation = PDB.vectors.rotmat(-normal, refVector)
        temp.transform(rotation, [0, 0, 0])
        # If it doesn't work, case3: normal and rotation.T
        if not test_rotation(temp, res1, res2, res3):
            temp = structure.copy()
            rotation = PDB.vectors.rotmat(normal, refVector).T
            temp.transform(rotation, [0, 0, 0])
            # If it doesn't work, case2: -normal and rotation.T
            if not test_rotation(temp, res1, res2, res3):
                temp = structure.copy()
                rotation = PDB.vectors.rotmat(-normal, refVector).T

    return rotation



def apply_rotation_matrix(structure:PDB.Structure.Structure, rotation:np.array):
    """
    Apply a translation on the structure based on the vector Translation
    Args:
        structure (PDB.Structure.Structure): Biopython PDB Structure
        rotation (np.array):  Rotation matrix (3x3)

    Returns:
        structure (PDB.Structure.Structure): rotated Biopython PDB Structure
    """
    #Rotation without translation
    structure[0].transform(rotation, [0, 0, 0])
    return structure


def apply_translation_vector(structure:PDB.Structure.Structure, translation:np.array):
    """
    Apply a translation on the structure based on the vector Translation
    Args:
        structure (PDB.Structure.Structure): Biopython PDB Structure
        translation (np.array):  Translation vector (1x3)

    Returns:
        structure (PDB.Structure.Structure): Translated Biopython PDB Structure
    """
    rotation = np.identity(3).tolist()
    structure[0].transform(rotation, translation)
    return structure



def get_transformation_from_reference(templateCode:str, pdbFolder:str, res1:int, res2:int, res3:int):

    pdbPath = f"{pdbFolder}/{templateCode}.pdb"

    structure =  get_structure(pdbPath)
    #1. Get translation
    translation = get_translation_vector(structure, res1, res2, res3)
    #2. Apply translation
    structure = apply_translation_vector(structure, translation)
    #3. get rotation
    rotation = get_rotation_matrix(structure, res1, res2, res3)

    #It's enough, no need to rotate the reference since we will transform all structures in the next step

    return (rotation,translation)


def create_output_dir(directory_path:str, domain:str, outputFolderName):
    """
    Create the "zaligned" output folder if it not extists yet.
    Args:
        directory_path (str): CATH folder
        domain (str): Domain name

    Returns:
        None
    """
    output_folder = f"{directory_path}/{domain}/{outputFolderName}"
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)


def transform_pdbs(rotation:np.array, translation:np.array,outputFolder:str):
    """
    Search for all PDB in the domain PDB folder, apply a translation and a rotation and then save the PDB in a new folder (zaligned)
    Args:
        directory_path (str): CATH folder
        domain (str): Domain name
        rotation (np.array): Rotation matrix (3x3)
        translation (np.array): Translation vector (1x3)

    Returns:
        None
    """

    pdbFolder = outputFolder
    outputFolder = outputFolder

    pdb_list = glob.glob(f"{pdbFolder}/*.pdb")

    for pdb in tqdm(pdb_list):
        structure = get_structure(pdb)
        structure = apply_translation_vector(structure,translation)
        structure = apply_rotation_matrix(structure, rotation)
        write_structure(outputFolder, structure)

def download(url): # from https://stackoverflow.com/a/57082711
    # Download SEED database
    out_file_path = url.split("/")[-1][:-3]
    print('Downloading pdb_chain_uniprot.csv Database from: {}'.format(url))
    response = request.urlopen(url)
    compressed_file = BytesIO(response.read())
    decompressed_file = gzip.GzipFile(fileobj=compressed_file, mode='rb')

    # Extract SEED database
    with open(TMPDIR+'/'+out_file_path, 'w') as outfile:
        outfile.write(decompressed_file.read().decode('utf-8'))

    return TMPDIR+'/'+out_file_path

    #1. List all pdbs
def lets_go(button):
    #button is just the button argument, not needed here.
    with generalOut:
        create_dirs()
        pdblist = glob.glob(f"{STRUCTURE_FOLDER}/*.pdb")
        pdbDict = {}
        for pdbpath in pdblist:
            name = Path(pdbpath).stem
            pdbDict[name] = pdbpath
        if REFERENCE_PDB not in pdbDict:
            print("Reference PDB not in folder. Please check and try again")
            sys.exit(1)

        print("1. Processing alignment")
        success = align_with_tmalign(pdblist, pdbDict)
        if not success:
            print("Alignment processing failed, stopping execution")
            return
            
        import importlib
        import pepr2ds.builder.Builder as builderEngine
        importlib.reload(builderEngine)

        #This setup dictionnary is just to avoid bugs with the full PePr2Ds pipeline.
        SETUP = {}
        SETUP["DOMAIN_PROSITE"] = None
        SETUP["PROSITE_DOMAIN"] = None
        SETUP["DOMAIN_CATH"] = None
        SETUP["CATH_DOMAIN"] = None
        SETUP["SUPERFAMILY"] = None

        builder = builderEngine.Builder(SETUP, recalculate = True, update=False, notebook = True, core=1)

        # Ignore cleaning.... For now...
        # builder.structure.clean_all_pdbs()


        builder.structure.CATHFOLDER = OUTPUT_FOLDER
        builder.structure.ALFAFOLDFOLDER = ''
        mappingFile = download("ftp://ftp.ebi.ac.uk/pub/databases/msd/sifts/flatfiles/csv/pdb_chain_uniprot.csv.gz")

        print("2. Processing PDB files")
        DATASET = builder.structure.process_pdb_list(pdblist = pdblist, datatype='custom',domname=DOMAINNAME, mappingFile=mappingFile)
        print("3. Computing protrusions")
        DATASET = builder.structure.add_protrusions(DATASET)

        print("4. Adding Origin of datas")
        DATASET = builder.sequence.add_uniprotId_Origin(DATASET)

        print("5. Adding Uniprot informations")
        builder.sequence.UNIPROTFOLDER = TMPDIR+'/'
        builder.sequence.download_uniprot_data(DATASET)
        DATASET = builder.sequence.add_info_from_uniprot(DATASET)

        print("6. Adding cluster informations")
        DATASET = builder.sequence.add_cluster_info(DATASET)
        DATASET = DATASET.drop_duplicates(subset=['atom_number','atom_name','residue_name','residue_number','cathpdb','chain_id'])


        if SUPERPOSE == False:
            print("7. Realigning all structures on the reference structure.")
            domain = DOMAINNAME
            pdbFileCode = REFERENCE_PDB
            res1 = AA_REF1
            res2 = AA_REF2
            res3 = AA_REF3

            pdbFolder = OUTPUT_FOLDER
            rotation, translation = get_transformation_from_reference(pdbFileCode, pdbFolder,
                                                                                res1,res2,res3)

            print(f"rotation matrix: {rotation}")
            print(f"translation vector: {translation}")

            transform_pdbs(rotation, translation, pdbFolder)

            print("Done")


        print("8. Computing Interfacial Binding Site (IBS)")
        import importlib
        import pepr2ds
        importlib.reload(pepr2ds)
        from pepr2ds.dataset.tagibs import Dataset
        CUSTOMDATA = Dataset(DATASET,"")
        CUSTOMDATA.tag_ibs(DATASET, 
                    domain = DOMAINNAME, #Domain
                    pdbreference = REFERENCE_PDB, #PDB Template
                    Uniref=None, #Sequence redundancy filter
                    addSequence=False, #add the non structural data in the IBS/NONIBS dataset.
                    extendAlign=False, #Extend the secondary structure instead of a raw "cut" based on the alignment position
                    overide_axis_mode = True, #use the Zaxis instead of the alignment to tag the IBS
                    zaxis=0, #Z axis plane to define "IBS" or not IBS
                    extendCoilOnly = False, #Extend coil only.
                    coordinates_folder_name = OUTPUT_FOLDER, #Where are the PDBs
                    data_type = 'custom', #Type of data asked ('cath' or 'alfafold' or 'cath+af')
                    base_folder= '',  #'Type of alignment folder'
                    silent=True,
                )

        print("  >Done<")

        df = CUSTOMDATA.domainDf[
            (CUSTOMDATA.domainDf['atom_name'] == 'CB') |
            ((CUSTOMDATA.domainDf['residue_name'] == 'GLY') & (CUSTOMDATA.domainDf['atom_name'] == 'CA'))
        ] 
        df.to_csv(OUTPUTCSV, sep=",")

        print(f"> Results saved in {OUTPUTCSV}")
        # CUSTOMDATA.export_dataset_PePrMInt(peprmint_web_dataset_folder=OUTPUT_FOLDER, PePr2Ds_folder=None, custom_mode=True)
        


def visualise_structure(structure = None, structureFile="/home/thibault/projects/peprmint/datafolder/databases/custom/domains/START/raw/7U9D.pdb"):
    import numpy as np
    
    from Bio import PDB
    parser = PDB.PDBParser()
    if structure == None:
            structure = parser.get_structure(id='struct', file=structureFile)

    displayMol = nv.show_biopython(structure)
    shape = displayMol.shape
    chain = structure[0].child_list[0].id

    #
    shape.add_sphere([0, 0, 0], [1, 0, 0], 1)
    shape.add_sphere([10, 10, 0], [0, 1, 0], 0.5)
    shape.add_sphere([-10, -10, 0], [0, 1, 0], 0.5)
    shape.add_sphere([10, -10, 0], [0, 1, 0], 0.5)
    shape.add_sphere([-10, 10, 0], [0, 1, 0], 0.5)

    shape.add_arrow([0, 0, 0], [0, 0, -10], [1, 0, 0], 1.0)

    mesh = [20, 20, 0,
            20, -20, 0,
            -20, -20, 0,
            -20, -20, 0,
            20, 20, 0,
            -20, 20, 0]

    color = [[0, 1, 0]] * len(mesh)  # RGB, for now let's fix it to blue
    color = np.asarray(color).flatten().tolist()
    shape.add_mesh(mesh, color)

    displayMol.update_representation(component=7, repr_index=0, opacity=0.5, side="double")

    display(displayMol)
    # displayMol.layout.width = '600px'
    # displayMol.layout.height = '600px'



In [None]:
## Preparing all widgets

import ipywidgets as widgets

style = {'description_width': 'initial'}

# Safe current directory handling for Voila
try:
    current_dir = os.getcwd()
    if not os.path.exists(current_dir):
        current_dir = os.path.expanduser("~")
except:
    current_dir = os.path.expanduser("~")

# Default path for development
default_path = os.getcwd()
if not os.path.exists(default_path):
    default_path = current_dir

# Initialize FileChooser widgets with error handling
try:
    structureFolder_widget = FileChooser(default_path)
    structureFolder_widget.title = "Structure folder"
    structureFolder_widget.show_only_dirs = True
except Exception as e:
    print(f"Warning: FileChooser initialization failed: {e}")
    # Fallback to text widget if FileChooser fails
    structureFolder_widget = widgets.Text(
        value=default_path,
        description='Structure folder:',
        style=style
    )

# Create a warning text for output folder
outputFolder_warning = widgets.HTML(
    value="<b>⚠️ Note:</b> The OUTPUT folder needs to be created beforehand. This widget cannot create new folders",
    style={'description_width': 'initial'}
)

try:
    outputFolder_widget = FileChooser(default_path)
    outputFolder_widget.title = "Output folder"
    outputFolder_widget.show_only_dirs = True
except Exception as e:
    print(f"Warning: Output FileChooser initialization failed: {e}")
    outputFolder_widget = widgets.Text(
        value=default_path,
        description='Output folder:',
        style=style
    )

# Alignment widgets
prealigned_widgets = widgets.Checkbox(
    value=True,
    description='Align structures with TMalign (uncheck if already aligned)',
    disabled=False,
    indent=False,
    style=style,
)

alignment_widget = widgets.Dropdown(
    options=['tmalign'],
    value='tmalign',
    description='Alignment method',
    disabled=False,
    style=style,
)

onlyCA_widget = widgets.Checkbox(
    value=True,
    description='Only CA?',
    disabled=False,
    indent=False,
    style=style,
)

onlySS_widget = widgets.Checkbox(
    value=True,
    description='Structured element ?',
    disabled=False,
    indent=False,
    style=style,
)

showPymolOutput_widget = widgets.Checkbox(
    value=False,
    description='Show Output ?',
    disabled=False,
    indent=False,
    style=style,
)

outputPymol = widgets.Output()

# Reference PDBs
referencePDB_widget = widgets.Text(
    value='',
    placeholder='Reference PDB name',
    description='Reference PDB:',
    disabled=False,
    style=style,
)

reference_aa1__widget = widgets.BoundedIntText(
    value=0,
    min=0,
    max=10000,
    step=1,
    description='Reference amino acid 1:',
    disabled=False,
    style=style,
)

reference_aa2__widget = widgets.BoundedIntText(
    value=0,
    min=0,
    max=10000,
    step=1,
    description='Reference amino acid 2:',
    disabled=False,
    style=style,
)

reference_aa3__widget = widgets.BoundedIntText(
    value=0,
    min=0,
    max=10000,
    step=1,
    description='Reference amino acid 3:',
    disabled=False,
    style=style,
)

previs_button = widgets.Button(description="Pre_visualisation", disabled=True)
outPrevis = widgets.Output(layout=widgets.Layout(height='300px', overflow_y='auto'))

# Other widgets
domain_widget = widgets.Text(
    value='',
    placeholder='Domain Name',
    description='Domain Name:',
    disabled=False,
    style=style,
)

try:
    OutputCSV_widget = FileChooser(default_path)
    OutputCSV_widget.title = "Output CSV"
except Exception as e:
    print(f"Warning: CSV FileChooser initialization failed: {e}")
    OutputCSV_widget = widgets.Text(
        value=default_path,
        description='Output CSV:',
        style=style
    )

runButton_widget = widgets.Button(
    description="Generate CSV for PePr2DS", 
    disabled=True, 
    style=style,
    layout=widgets.Layout(width='50%', height='80px', border="2px solid black"),
    tooltip="Generate CSV file. Button will be activated when all parameters will be filed."
)

generalOut = widgets.Output()

print("Widgets initialized successfully")

Widgets initialized successfully


In [None]:
def previsualise(b):
    outPrevis.clear_output()
    with outPrevis:
        try:
            pdbFileCode = referencePDB_widget.value
            res1 = reference_aa1__widget.value
            res2 = reference_aa2__widget.value
            res3 = reference_aa3__widget.value
            
            # Handle both FileChooser and Text widget values
            if hasattr(structureFolder_widget, 'value'):
                pdbFolder = structureFolder_widget.value
            else:
                pdbFolder = structureFolder_widget.selected

            rotation, translation = get_transformation_from_reference(pdbFileCode, pdbFolder, res1, res2, res3)

            structure = get_structure(os.path.join(pdbFolder, f"{pdbFileCode}.pdb"))
            structure = apply_translation_vector(structure, translation)
            structure = apply_rotation_matrix(structure, rotation)
            
            if not VOILA_MODE:
                visualise_structure(structure)
            else:
                print("Visualization not available in Voila mode")
                print(f"Rotation matrix calculated: {rotation}")
                print(f"Translation vector: {translation}")
        except Exception as e:
            print(f"Error in previsualisation: {e}")

def get_widget_value(widget):
    """Helper function to get value from different widget types"""
    if hasattr(widget, 'selected'):
        return widget.selected
    elif hasattr(widget, 'value'):
        return widget.value
    else:
        return ""

def update_values_and_check(change=None):
    try:
        global STRUCTURE_FOLDER, OUTPUT_FOLDER, PYMOL_ALIGNMENT_METHOD
        global ALIGNMENT_ONLYCA, EXCLUDE_LOOPS, SUPERPOSE
        global REFERENCE_PDB, AA_REF1, AA_REF2, AA_REF3
        global OUTPUTCSV, DOMAINNAME

        STRUCTURE_FOLDER = get_widget_value(structureFolder_widget)
        OUTPUT_FOLDER = get_widget_value(outputFolder_widget)
        PYMOL_ALIGNMENT_METHOD = alignment_widget.value
        ALIGNMENT_ONLYCA = onlyCA_widget.value
        EXCLUDE_LOOPS = onlySS_widget.value
        SUPERPOSE = prealigned_widgets.value
        REFERENCE_PDB = referencePDB_widget.value
        AA_REF1 = reference_aa1__widget.value
        AA_REF2 = reference_aa2__widget.value
        AA_REF3 = reference_aa3__widget.value
        OUTPUTCSV = get_widget_value(OutputCSV_widget)
        DOMAINNAME = domain_widget.value

        # Check if everything is filled to activate previsualisation button
        if STRUCTURE_FOLDER and OUTPUT_FOLDER and REFERENCE_PDB and AA_REF1 and AA_REF2 and AA_REF3:
            pdb = os.path.join(STRUCTURE_FOLDER, f"{REFERENCE_PDB}.pdb")
            if os.path.exists(pdb):
                previs_button.disabled = False

        # Run all calculation if everything is okay
        greenlight = True

        if not STRUCTURE_FOLDER: greenlight = False
        if not OUTPUT_FOLDER: greenlight = False
        if not REFERENCE_PDB: greenlight = False
        if not OUTPUTCSV: greenlight = False
        if not DOMAINNAME: greenlight = False

        if greenlight:
            with generalOut:
                print("Everything seems fine, ready to generate the CSV!")
            runButton_widget.disabled = False
        else:
            runButton_widget.disabled = True
            
    except Exception as e:
        with generalOut:
            print(f"Error in update_values_and_check: {e}")

# ...existing layout code...

# Widget event handling with error protection
try:
    previs_button.on_click(previsualise)
    
    # Handle different widget types for file choosers
    if hasattr(structureFolder_widget, 'register_callback'):
        structureFolder_widget.register_callback(update_values_and_check)
    else:
        structureFolder_widget.observe(update_values_and_check, names='value')
        
    if hasattr(outputFolder_widget, 'register_callback'):
        outputFolder_widget.register_callback(update_values_and_check)
    else:
        outputFolder_widget.observe(update_values_and_check, names='value')
        
    if hasattr(OutputCSV_widget, 'register_callback'):
        OutputCSV_widget.register_callback(update_values_and_check)
    else:
        OutputCSV_widget.observe(update_values_and_check, names='value')
    
    referencePDB_widget.observe(update_values_and_check, names='value')
    reference_aa1__widget.observe(update_values_and_check, names='value')
    reference_aa2__widget.observe(update_values_and_check, names='value')
    reference_aa3__widget.observe(update_values_and_check, names='value')
    domain_widget.observe(update_values_and_check, names='value')
    runButton_widget.on_click(lets_go)
    
    
except Exception as e:
    print(f"Warning: Some event handlers failed to register: {e}")

# Initial update
update_values_and_check()

Event handlers registered successfully


## Setting up folders

- `structure folder` is where all your structures are (in PDB format): **NOTE: To assure that the right information will be parsed from other database, be sure that the first 4 letters ARE the PDB code**
- `output folder` is where all the superposed structures will be
- `output csv` is the CSV that will be generated.
- `Domain Name` is the name you want to give to your dataset (please no more than 4 letters).

In case you already pre-aligned all structures into the same referencial and that you don't need to align them with pymol, please set `structure folder` same as `output folder`

**Note**: File browsing widgets may appear as text inputs in Voila mode.

In [5]:
layout_folders = widgets.Layout(
    display='flex',
    flex_flow='column',
    align_items='stretch',
    border='0px solid',
    width='50%'
)

# Create folder input widgets
folderbox = widgets.VBox([structureFolder_widget, outputFolder_warning, outputFolder_widget])
otherbox = widgets.VBox([domain_widget, OutputCSV_widget])

accordionInputs = widgets.Accordion([folderbox, otherbox], layout=layout_folders)
accordionInputs.set_title(0, title="Input folders")
accordionInputs.set_title(1, title="Output parameters")

# Display with error handling
try:
    display(accordionInputs)
except Exception as e:
    print(f"Error displaying accordion: {e}")
    # Fallback display
    display(folderbox)
    display(otherbox)

Accordion(children=(VBox(children=(FileChooser(path='/home/thibault/projects/peprmint/tubiana_etal_2022/utils'…

## Parameters for structure alignment

Your structures need to be aligned/superposed for proper analysis. You have two options:

1. **Use pre-aligned structures**: If your structures are already aligned, uncheck the "Align structures with TMalign" option and make sure your `structure folder` and `output folder` point to the same location.

2. **Align with TMalign**: If your structures are not aligned, check the "Align structures with TMalign" option. This will use the TMAlign algorithm to align all structures to your reference structure.

**TMalign parameters**:
- Uses only alpha carbons (CA) for alignment
- Provides structural alignment without requiring sequence similarity
- Automatically handles different chain IDs and structures

In [6]:
layoutPymol = widgets.Layout(
    display='flex',
    flex_flow='column',
    align_items='stretch',
    border='',
    width='50%'
)

pymolParametersBox = widgets.VBox([
    prealigned_widgets, 
    alignment_widget,
    onlyCA_widget,
    onlySS_widget,
    showPymolOutput_widget
])

pymolAlignmentTab = widgets.Tab()
pymolAlignmentTab.children = [
    pymolParametersBox, 
    widgets.HBox([outputPymol], layout=widgets.Layout(height='300px', overflow_y='auto'))
]
pymolAlignmentTab.set_title(0, "Parameters")
pymolAlignmentTab.set_title(1, "Outputs")

accordionPymol = widgets.Accordion([pymolAlignmentTab], layout=layoutPymol)
accordionPymol.set_title(0, title="TMalign Alignment Parameters")

try:
    display(accordionPymol)
except Exception as e:
    print(f"Error displaying pymol accordion: {e}")
    display(pymolParametersBox)

Accordion(children=(Tab(children=(VBox(children=(Checkbox(value=True, description='Align structures with TMali…

## Parameters for reference alignment 

In order to assign Interfacial Binding Site (IBS), all proteins must be oriented and aligned.  
To do so, **one protein** is a reference. Every other proteins will be aligned on this reference with TMAlign.  
Then, **3 amino acids** are required. Those amino acid will defined a 3D Plan. The plan will be aligned on the Zaxis = 0 and every amino acid bellow this plan will be considered as part of the IBS.  
You can click on "Previsualisation" to display the protein with its plan once the 3 amino acids are defined.

In [7]:
layoutPymol = widgets.Layout(
    display='flex',
    flex_flow='column',
    align_items='stretch',
    border='',
    width='50%'
)

tab1HBox_layout = widgets.Layout(
    display='flex',
    flex_flow='column',
    align_items='stretch',
    border='0 px solid',
    width='100%'
)

referencebox = widgets.VBox([
    referencePDB_widget,
    reference_aa1__widget, 
    reference_aa2__widget, 
    reference_aa3__widget, 
    widgets.VBox([previs_button, outPrevis])
], layout=tab1HBox_layout)

accordion_previs = widgets.Accordion([referencebox], layout=layoutPymol)
accordion_previs.set_title(0, title="IBS Selection parameters and previsualisation")

try:
    display(accordion_previs)
except Exception as e:
    print(f"Error displaying reference accordion: {e}")
    display(referencebox)

Accordion(children=(VBox(children=(Text(value='', description='Reference PDB:', placeholder='Reference PDB nam…

In [8]:
try:
    display(runButton_widget)
    
    text = "General Output"
    outputlabel = widgets.HTML(value=f"<u><b><font color='black'>{text}</b></u>")
    output_container = widgets.HBox(
        children=[outputlabel, generalOut], 
        layout=widgets.Layout(
            height='300px', 
            overflow_y='auto', 
            border='1px solid black', 
            width="50%"
        )
    )
    display(output_container)
except Exception as e:
    print(f"Error displaying output widgets: {e}")
    display(runButton_widget)
    display(generalOut)

# Initial status message
with generalOut:
    print("Interface initialized. Please configure your settings above.")

Button(description='Generate CSV for PePr2DS', disabled=True, layout=Layout(border_bottom='2px solid black', b…

HBox(children=(HTML(value="<u><b><font color='black'>General Output</b></u>"), Output()), layout=Layout(border…