In [None]:
import pandas as pd
import subprocess
import logging
from concurrent.futures import ProcessPoolExecutor, as_completed
import os

# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Define the path to your FoldX executable
foldx_path = 'foldxMacC11_0/FoldX'

def adjust_mutation_based_on_chain(mutation, chain):
    try:
        residue = int(mutation[1:-1])
        if chain == 'G' and residue >= 75:
            adjusted = f"{mutation[0]}{chain}{residue + 2}{mutation[-1]}"
        else:
            adjusted = f"{mutation[0]}{chain}{residue}{mutation[-1]}"
        #logging.info(f"Adjusted mutation {mutation} to {adjusted} for chain {chain}")
        return adjusted
    except ValueError:
        logging.error(f"Error processing mutation: {mutation}")
        return mutation

def format_mutations(mut_str):
    if not mut_str:
        return None
    mutations = []
    mut_list = mut_str.split()
    for mut in mut_list:
        for chain in ['D', 'E', 'G']:
            adjusted_mutation = adjust_mutation_based_on_chain(mut, chain)
            mutations.append(adjusted_mutation)
    formatted_mutation = ','.join(mutations) + ';'
    #logging.info(f"Formatted mutations: {formatted_mutation}")
    return formatted_mutation

def run_foldx(mutations, barcode):
    individual_list_filename = f'individual_list_{barcode}.txt'
    output_filename = f'Dif_{barcode}_7we7_spike.fxout'
    with open(individual_list_filename, 'w') as file:
        file.write(mutations)

    command = [foldx_path, '--command=BuildModel', '--pdb=7we7_spike.pdb', 
               '--mutant-file=' + individual_list_filename, '--output-file=' + barcode, '--out-pdb=false', '--numberOfRuns=1']
    result = subprocess.run(command, capture_output=True, text=True)

    if result.returncode == 0:
        # Check if output file exists before attempting to read
        if os.path.exists(output_filename):
            with open(output_filename, 'r') as file:
                for line in file:
                    if line.startswith('7we7_spike_1'):
                        parts = line.split()
                        return barcode, parts[2]
        else:
            logging.error(f"Expected output file not found: {output_filename}")
            return barcode, "Output file not found"
    else:
        logging.error(f"FoldX error for {barcode}: {result.stderr}")
    return barcode, None

def process_mutations(df):
    results = []
    total = len(df)
    with ProcessPoolExecutor() as executor:
        futures = {executor.submit(run_foldx, format_mutations(row['aa_substitutions']), row['barcode']): row['barcode'] for index, row in df.iterrows()}
        for i, future in enumerate(as_completed(futures), 1):
            barcode, energy = future.result()
            results.append((barcode, energy))
            print(f"Finished processing {barcode} ({i}/{total})")
    return results

def main():
    # Load data
    df = pd.read_csv('kw_dms/foldX test/codon_variants.csv', usecols=['barcode','library','aa_substitutions','n_aa_substitutions'])
    df.query("n_aa_substitutions > 0 and library == 'LibB'")

    # Process mutations
    results = process_mutations(df)
    results_df = pd.DataFrame(results, columns=['Barcode', 'Total Energy'])

    # Write results to a new CSV file
    results_df.to_csv('foldx_results.csv', index=False)
    logging.info("Results have been written to foldx_results.csv")

if __name__ == '__main__':
    main()
