In [None]:
# prepare_mutations.py
import pandas as pd
import sys

def adjust_mutation_based_on_chain(mutation, chain):
    try:
        residue = int(mutation[1:-1])
        if chain == 'G' and residue >= 75:
            return f"{mutation[0]}{chain}{residue + 2}{mutation[-1]}"
        return f"{mutation[0]}{chain}{residue}{mutation[-1]}"
    except ValueError:
        return mutation

def format_mutations(mut_str):
    if not mut_str:
        return None
    mutations = []
    mut_list = mut_str.split()
    for mut in mut_list:
        for chain in ['D', 'E', 'G']:
            adjusted_mutation = adjust_mutation_based_on_chain(mut, chain)
            mutations.append(adjusted_mutation)
    return ','.join(mutations) + ';'

def main(input_file):
    df = pd.read_csv(input_file, usecols=['barcode','aa_substitutions'])
    df = df.dropna(subset=['aa_substitutions'])  # Ensure there are mutations to process
    for index, row in df.iterrows():
        formatted_mutations = format_mutations(row['aa_substitutions'])
        with open(f'data/{row["barcode"]}_mutations.txt', 'w') as f:
            f.write(formatted_mutations)

if __name__ == "__main__":
    main(sys.argv[1])
