In [None]:
import sys
addpath = "/home/newloci/tandem"
sys.path.append(addpath)
from tandem.src.stand_alone_consurf.consurf import determine_mode, get_form_direct, extract_data_from_model, no_MSA, extract_data_from_MSA, find_best_substitution_model
from tandem.src.stand_alone_consurf.consurf import run_rate4site_old, assign_colors_according_to_r4s_layers, write_MSA_percentage_file, consurf_create_output
from tandem.src.stand_alone_consurf.consurf import conseq_create_output, zip_all_outputs
from tandem.src.stand_alone_consurf.consurf import get_query_seq_in_MSA, reveal_buried_exposed
import os, re
import GENERAL_CONSTANTS
from datetime import datetime
from datetime import date
import time
from prody import LOGGER

LOGGER.start("consurf.log")

form, vars = get_form_direct(
    query="P69905",
    structure="4xr8.pdb",
    chain="A",
    DB="/home/newloci/alphafold3/db/uniref90_2022_05.fa",
    work_dir=".",
    algorithm="HMMER"
)


vars['run_log'] = "log.txt"
# LOG = open(vars['working_dir'] + vars['run_log'], 'w')
vars['tree_file'] = "TheTree.txt"
vars['msa_fasta'] = "msa_fasta.aln" 

vars['running_mode'] = determine_mode(form, vars)
vars['script_dir'] = os.path.abspath(".")

os.chdir(vars['working_dir'])
# LOG.write("determine_mode : Mode is: " + vars['running_mode'] + "\n")
vars['BLAST_out_file'] = "sequences_found.txt"
if form['DNA_AA'] == "AA": # proteins
    vars['protein_or_nucleotide'] = "proteins"
    vars['Msa_percentageFILE'] = "msa_aa_variety_percentage.csv"
else: # nucleotides
    vars['protein_or_nucleotide'] = "nucleotides"
    vars['Msa_percentageFILE'] = "msa_nucleic_acids_variety_percentage.csv"


vars['All_Outputs_Zip'] = "Consurf_Outputs.zip"
if form['pdb_FILE'] is not None: # User PDB
    match = re.search(r'([^\/]+)$', form['pdb_FILE'])
    if match:
        vars['Used_PDB_Name'] = match.group(1)
    vars['Used_PDB_Name'] = re.sub(r"[() ]", r"_", vars['Used_PDB_Name'])
    match = re.search(r'(\S+)\.', vars['Used_PDB_Name'])
    if match:
        vars['Used_PDB_Name'] = match.group(1)
else: # ConSeq Mode, No Model
    vars['Used_PDB_Name'] = "no_model"


if form['E_VALUE'].isdigit() and form['E_VALUE'] != "0":
    # if the user inserted an integer, we turn it to a fraction
    number_of_zeros = int(form['E_VALUE'])
    form['E_VALUE'] = "0."
    i = 1
    while i < number_of_zeros:
        form['E_VALUE'] += "0"
        i += 1
    form['E_VALUE'] += "1"


if int(form['MAX_REDUNDANCY']) >= 100:
    vars['hit_redundancy'] = 99.999999
else:
    vars['hit_redundancy'] = float(form['MAX_REDUNDANCY'])


form['Run_Number'] = "0"
vars['hit_min_length'] = GENERAL_CONSTANTS.FRAGMENT_MINIMUM_LENGTH # minimum length of homologs
vars['min_num_of_hits'] = GENERAL_CONSTANTS.MINIMUM_FRAGMENTS_FOR_MSA # minimum number of homologs
vars['FINAL_sequences'] = "query_final_homolougs.fasta" # finial homologs for creating the MSA
vars['FINAL_sequences_html'] = "query_final_homolougs.html" # html files showing the finial homologs to the user
vars['submission_time'] = str(datetime.now())
vars['date'] = date.today().strftime("%d/%m/%Y")
vars['time_table'] = []
vars['current_time'] = time.time()
vars['gradesPE'] = vars['Used_PDB_Name'] + "_consurf_grades.txt" # file with consurf output
#vars['tree_file'] = "TheTree.txt"
vars['zip_list'] = []

# pymol and chimera scripts
vars['chimera_color_script'] = vars['script_dir'] + "color_consurf_chimerax_session.py"
vars['chimera_color_script_CBS'] = vars['script_dir'] + "color_consurf_CBS_chimerax_session.py"
vars['pymol_color_script_isd'] = vars['script_dir'] + "color_consurf_pymol_isd_session.py"
vars['pymol_color_script_CBS_isd'] = vars['script_dir'] + "color_consurf_CBS_pymol_isd_session.py"

vars['msa_clustal'] = "msa_clustal.aln" # if the file is not in clustal format, we create a clustal copy of it

vars['Colored_Seq_PDF'] = "consurf_colored_seq.pdf"
vars['Colored_Seq_CBS_PDF'] = "consurf_colored_seq_CBS.pdf"

vars['gradesPE_Output'] = [] # an array to hold all the information that should be printed to gradesPE
# in each array's cell there is a hash for each line from r4s.res.
# POS: position of that aa in the sequence ; SEQ : aa in one letter ;
# GRADE : the given grade from r4s output ; COLOR : grade according to consurf's scale

vars['zip_list'].append(vars['tree_file'])
vars['zip_list'].append(vars['gradesPE'])
vars['zip_list'].append(vars['Msa_percentageFILE'])
vars['zip_list'].append(vars['Colored_Seq_PDF'])
vars['zip_list'].append(vars['Colored_Seq_CBS_PDF'])
vars['zip_list'].append(vars['msa_fasta'])

## mode : include pdb

# create a pdbParser, to get various info from the pdb file
if vars['running_mode'] == "_mode_pdb_no_msa" or vars['running_mode'] == "_mode_pdb_msa" or vars['running_mode'] == "_mode_pdb_msa_tree":
    extract_data_from_model(form, vars)

"""
## mode : only protein sequence

# if there is only protein sequence: we upload it.
elif vars['running_mode'] == "_mode_no_pdb_no_msa":

    upload_protein_sequence()
"""
## mode : no msa - with PDB or without PDB

if vars['running_mode'] == "_mode_pdb_no_msa" or vars['running_mode'] == "_mode_no_pdb_no_msa":
    no_MSA(form, vars)
## mode : include msa
elif vars['running_mode'] == "_mode_pdb_msa" or vars['running_mode'] == "_mode_msa" or vars['running_mode'] == "_mode_pdb_msa_tree" or vars['running_mode'] == "_mode_msa_tree":
    extract_data_from_MSA(form, vars)

if form['SUB_MATRIX'] == "BEST":
    #vars['best_fit'] = True
    find_best_substitution_model(form, vars)
else:
    vars['best_fit'] = "model_chosen"

run_rate4site_old(form, vars)
assign_colors_according_to_r4s_layers(form, vars)
write_MSA_percentage_file(form, vars)

## mode : include pdb

if vars['running_mode'] == "_mode_pdb_no_msa" or vars['running_mode'] == "_mode_pdb_msa" or vars['running_mode'] == "_mode_pdb_msa_tree":
    consurf_create_output(form, vars)

## mode : ConSeq - NO PDB

if vars['running_mode'] == "_mode_msa" or vars['running_mode'] == "_mode_no_pdb_no_msa" or vars['running_mode'] == "_mode_msa_tree":
    conseq_create_output(form, vars)

zip_all_outputs(vars)

ModuleNotFoundError: No module named 'fpdf'

In [None]:
# import sys
# addpath = "/home/newloci/tandem/src/stand_alone_consurf"
# sys.path.append(addpath)
# from main import run
from src.stand_alone_consurf.main import run 
consurfile = run(
    query="P69905",
    structure="/home/newloci/tandem/4xr8.pdb",
    chain="A",
    DB="/home/newloci/alphafold3/db/uniref90_2022_05.fa",
    work_dir="P69905",
    algorithm="HMMER"
)

