<img src="https://raw.githubusercontent.com/engelberger/FrustraEvo/master/dalle3.png" height="200" align="right" style="height:240px">

# FrustraEvo Google Colab Notebook

This notebook is designed to help you understand and use FrustraEvo, a tool that allows you to study energetic patterns within and between protein families. FrustraEvo calculates the frustration logo and the frustration contact maps, using a set of aligned sequences (MSA) and their corresponding structures as input files.

## Usage

To use FrustraEvo, please follow the instructions in this notebook.

## Feedback/Issues

Please report any issues to mariaines.freiberger@gmail.com

In [None]:
#@title Setup Docker and Pull Image

import os
import hashlib

def add_hash(x,y):
  return x+"_"+hashlib.sha1(y.encode()).hexdigest()[:5]

def check_dir_exists(dir_path):
  return os.path.exists(dir_path)

# Install udocker
print("Installing udocker...")
!pip install -q udocker

# Allow root access for udocker
print("Allowing root access for udocker...")
!udocker --allow-root install

# Pull the FrustraEvo image
print("Pulling FrustraEvo image...")
!udocker --allow-root pull proteinphysiologylab/frustraevo > /dev/null


In [None]:
#@title Run FrustraEvo
from google.colab import files
import os
import subprocess
from pathlib import Path
from typing import Dict, Optional
import urllib.request
import zipfile
import shutil

#@markdown Create a folder where you will put the PDB files and the Multiple Sequence Alignment file (MSA) in .fasta format.

FOLDER_NAME = 'FrustraEvo'
INPUT_FILES_DIR = 'input_files'
FRUSTRAEVO_BASE_URL = 'https://frustraevo.qb.fcen.uba.ar/static'

#@markdown Enter the parameters for the bash file:
job_id = 'example' #@param {type:"string"}
run_example = True #@param {type:"boolean"}
#@markdown If run example is True the following variables will be ommited!
fasta_file = 'upload_fasta'  #@param ["upload_fasta"]
pdb_source = 'upload_zip' #@param ["upload_zip", "predict_with_esm"]
protein_ref = '3a0g-A' #@param {type:"string"}
contact_maps = 'yes' #@param ["yes", "no"]

def create_job_directories(base_path: Path, job_id: str) -> tuple[Path, Path]:
    """Create input and output directories for the job."""
    input_dir = base_path / job_id / INPUT_FILES_DIR
    output_dir = base_path / job_id / "output_files"
    
    input_dir.mkdir(parents=True, exist_ok=True)
    output_dir.mkdir(parents=True, exist_ok=True)
    
    return input_dir, output_dir

def handle_file_upload(input_dir: Path, file_description: str, file_extension: str) -> str:
    """Handle file upload and return the filename."""
    print(f"Please upload your {file_description}")
    uploaded = files.upload()
    
    for filename, content in uploaded.items():
        file_path = input_dir / filename
        file_path.write_bytes(content)
        if filename.endswith(file_extension):
            return filename
    
    return ""

def unzip_file(zip_path: Path, extract_path: Path) -> None:
    """Unzip file to specified path using native Python zipfile."""
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_path)

def download_file(url: str, output_path: Path) -> None:
    """Download file from URL using native Python urllib."""
    urllib.request.urlretrieve(url, output_path)

def create_bash_script(job_dir: Path) -> None:
    """Create the bash script for running FrustraEvo."""
    bash_content = """#! /bin/bash
udocker --allow-root run -v $1:/pdb/ --rm proteinphysiologylab/frustraevo:latest /bin/bash -c "cd / && sh /run.sh $2 $3 $4 $5" > log.txt
cd $1
chown -R $(whoami) $1
mv FrustraEvo_$2 ../output_files
"""
    script_path = job_dir / 'run.sh'
    script_path.write_text(bash_content)
    script_path.chmod(0o755)  # Make script executable

def run_frustraevo(base_dir: Path, job_id: str, input_dir: Path, params: Dict[str, str]) -> None:
    """Run the FrustraEvo analysis."""
    cmd = [
        'sudo', 'sh', str(base_dir / job_id / 'run.sh'),
        str(input_dir),
        job_id,
        params['fasta_file'],
        params['protein_ref'],
        params['contact_maps']
    ]
    subprocess.run(cmd, check=True)

def main():
    try:
        # Initialize paths
        base_path = Path(os.getcwd()) / FOLDER_NAME
        job_id_hashed = add_hash(job_id, fasta_file)
        
        # Handle existing directories
        n = 0
        while (base_path / f"{job_id_hashed}_{n}").exists():
            n += 1
        if n > 0:
            job_id_hashed = f"{job_id_hashed}_{n}"
        
        # Create directories
        input_dir, output_dir = create_job_directories(base_path, job_id_hashed)
        
        params = {
            'fasta_file': fasta_file,
            'protein_ref': protein_ref,
            'contact_maps': contact_maps
        }
        
        if not run_example:
            if fasta_file == 'upload_fasta':
                params['fasta_file'] = handle_file_upload(input_dir, "fasta file", '.fasta')
                
            if pdb_source == 'upload_zip':
                zip_filename = handle_file_upload(input_dir, "PDB files in a zip format", '.zip')
                if zip_filename:
                    unzip_file(input_dir / zip_filename, input_dir)
        else:
            # Handle example case
            params.update({
                'fasta_file': 'Alphas.fasta',
                'protein_ref': '3a0g-A'
            })
            
            print("Downloading example inputs...")
            # Download fasta file
            fasta_url = f"{FRUSTRAEVO_BASE_URL}/{params['fasta_file']}"
            fasta_path = input_dir / params['fasta_file']
            download_file(fasta_url, fasta_path)
            
            # Download and extract PDB files
            pdbs_url = f"{FRUSTRAEVO_BASE_URL}/pdbs.zip"
            zip_path = input_dir / "pdbs.zip"
            download_file(pdbs_url, zip_path)
            unzip_file(zip_path, input_dir)
        
        # Create and run bash script
        print("Creating bash script...")
        create_bash_script(base_path / job_id_hashed)
        
        print("Running bash script...")
        run_frustraevo(base_path, job_id_hashed, input_dir, params)
        
        print("Your results will be saved in the specified folder.")
        
    except urllib.error.URLError as e:
        print(f"Error downloading file: {e}")
    except zipfile.BadZipFile as e:
        print(f"Error extracting zip file: {e}")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")


main()

In [None]:
#@title Display interactive tables
import matplotlib.pyplot as plt
import pandas as pd
import IPython.display as display
from google.colab import data_table

data_table.enable_dataframe_formatter()

# Define the path to the output files
output_path = os.path.join(os.getcwd(), folder_name, job_id_hashed, 'output_files', f'FrustraEvo_{job_id_hashed}', 'OutPutFiles')

# Display tables
table_files = [f for f in os.listdir(output_path) if f.endswith('.tab')]
for table_file in table_files:
    print(f"Displaying table: {table_file}")
    df = pd.read_csv(os.path.join(output_path, table_file), sep='\t')
    display.display(df)

In [None]:
#@title Display output figures
import matplotlib.pyplot as plt
import pandas as pd
import IPython.display as display
from google.colab import data_table

data_table.enable_dataframe_formatter()

# Define the path to the output files
output_path = os.path.join(os.getcwd(), folder_name, job_id_hashed, 'output_files', f'FrustraEvo_{job_id_hashed}', 'OutPutFiles')

# Display images
image_files = [f for f in os.listdir(output_path) if f.endswith('.png')]
for image_file in image_files:
    print(f"Displaying image: {image_file}")
    display.display(display.Image(os.path.join(output_path, image_file)))



