### Test NERVE

In [76]:
import subprocess, os
import pandas as pd

In [47]:
class RunNerve():
    """Class to store nerve parameters and run NERVE program"""
    def __init__(self):
        """Initiate arguments and program path"""
        self.args = {'annotation': True, 'e_value': 1e-10, 'gram': None, 'minlength': 9, 'mismatch': 1,
                     'mouse': True, 'mouse_peptides_sum_limit': .15, 'proteome1': None, 'proteome2': None, 
                     'p_ad_extracellular_filter': .38, 'p_ad_no_citoplasm_filter': .46, 'padlimit': .5, 
                     'razor': True, 'razlen': 50, 'select': True, 'substitution': 3, 
                     'transmemb_doms_limit': 3, 'virlimit': .5, 'virulent': True, 'working_dir': None, 
                     'NERVE_dir': '../../', 'iFeature_dir': '/iFeature', 'DeepFri_dir': '/DeepFri'
                     }
        self.program = "../../code/NERVE.py"

    def print_args(self):
        print(f'Arguments are:\n{self.args}')
        print(f'NERVE executable path is:\n{self.program}')
    
    def run(self):
        """Pass commands to line
        program: command/program to run
        args: arguments"""
        
        program = self.program
        args = self.args
        for key in args:
            args[key] = str(args[key])
        
        args_list = []
        for item in args.items():
            args_list.append(f'--{item[0]}')
            args_list.append(item[1])
        cmd = ['python3'] + [program] + args_list
        process = subprocess.Popen(cmd, stdout=subprocess.PIPE)
        output, error = process.communicate()
        return output, error

In [32]:
# prepare files for tuning
def to_fasta(df, outfile):
    """Converts pandas cells to fasta"""
    outfile = open(outfile, 'w')
    for index, row in df.iterrows():
        outfile.write('>' + row['protein']+ '_' + row['organism name'] + '\n' + row['sequence'].strip() + '\n')
    outfile.close()

df = pd.read_excel('../../database/antigens/test_antigens_summary.xlsx', sheet_name = 'nerve_2_tests')
df = df[df['nerve_train'] == 1]
df_neg = df[df['gram'] == '-']
df_pos = df[df['gram'] == '+']
to_fasta(df_neg, './tuning/gram_neg.fasta')
to_fasta(df_pos, './tuning/gram_pos.fasta')

In [83]:
df[df['gram'] == '+']['Proteome ID'].unique()

array(['UP000001978', 'UP000031961', 'UP000070260', 'UP000032903',
       'UP000217443', 'UP000001584', 'UP000001020', 'UP000007137'],
      dtype=object)

In [88]:
def tuning(padlimit, virlimit, proteomes_neg=[], proteomes_pos=[]):
    """Runs NERVE tuning"""
    outdic = {'n': {},
              'p': {}}
    
    gram_neg = RunNerve()
    gram_neg.args['proteome1'] = './tuning/gram_neg.fasta'
    gram_neg.args['gram'] = 'n'
    gram_neg.args.pop('proteome2')
    working_dir = f'./tuning/gram_neg_padlimit_{str(padlimit)}_virlimit_{str(virlimit)}'
    gram_neg.args['working_dir'] = working_dir
    out, err = gram_neg.run()
    df_vaccines = pd.read_csv(os.path.join(working_dir, 'vaccine_candidates.csv'))
    df_non_vaccines = pd.read_csv(os.path.join(working_dir, 'discarded_proteins.csv'))    
    outdic['n']['gram_neg_recall'] = len(df_vaccines) / (len(df_vaccines) + len(df_non_vaccines))
    
    gram_pos = RunNerve()
    gram_pos.args['proteome1'] = './tuning/gram_pos.fasta'
    gram_pos.args['gram'] = 'p'
    gram_pos.args.pop('proteome2')
    working_dir = f'./tuning/gram_pos_padlimit_{str(padlimit)}_virlimit_{str(virlimit)}'
    gram_pos.args['working_dir'] = working_dir
    out, err = gram_pos.run()
    df_vaccines = pd.read_csv(os.path.join(working_dir, 'vaccine_candidates.csv'))
    df_non_vaccines = pd.read_csv(os.path.join(working_dir, 'discarded_proteins.csv'))    
    outdic['p']['gram_pos_recall'] = len(df_vaccines) / (len(df_vaccines) + len(df_non_vaccines))
    
    for proteome in proteomes_neg:
        prot_run = RunNerve()
        prot_run.args['proteome1'] = proteome
        prot_run.args['gram'] = 'n'
        prot_run.args.pop('proteome2')
        working_dir = f'./tuning/{proteome}_gram_n_padlimit_{str(padlimit)}_virlimit_{str(virlimit)}'
        prot_run.args['working_dir'] = working_dir
        out, err = prot_run.run()
        df_vaccines = pd.read_csv(os.path.join(working_dir, 'vaccine_candidates.csv'))
        df_non_vaccines = pd.read_csv(os.path.join(working_dir, 'discarded_proteins.csv'))    
        outdic['n'][proteome] = len(df_vaccines) / (len(df_vaccines) + len(df_non_vaccines))
     
    for proteome in proteomes_pos:
        prot_run = RunNerve()
        prot_run.args['proteome1'] = proteome
        prot_run.args['gram'] = 'p'
        prot_run.args.pop('proteome2')
        working_dir = f'./tuning/{proteome}_gram_n_padlimit_{str(padlimit)}_virlimit_{str(virlimit)}'
        prot_run.args['working_dir'] = working_dir
        out, err = prot_run.run()
        df_vaccines = pd.read_csv(os.path.join(working_dir, 'vaccine_candidates.csv'))
        df_non_vaccines = pd.read_csv(os.path.join(working_dir, 'discarded_proteins.csv'))    
        outdic['p'][proteome] = len(df_vaccines) / (len(df_vaccines) + len(df_non_vaccines))
    
    return outdic

In [None]:
outdic = tuning(0.5, 0.5, ['UP000000419', 'UP000002676', 'UP000000540'], ['UP000001978', 'UP000031961', 'UP000001584'])
outdic

In [72]:
test = RunNerve()
test.args['proteome1'] = './tuning/gram_neg.fasta'
test.args['gram'] = 'n'
test.args.pop('proteome2')
test.args['working_dir'] = './tuning/gram_neg_test'
test.args['virulent'] = False
test.print_args()
test.run()

Arguments are:
{'annotation': True, 'e_value': 1e-10, 'gram': 'n', 'minlength': 9, 'mismatch': 1, 'mouse': True, 'mouse_peptides_sum_limit': 0.15, 'proteome1': './tuning/gram_neg.fasta', 'p_ad_extracellular_filter': 0.38, 'p_ad_no_citoplasm_filter': 0.46, 'padlimit': 0.5, 'razor': True, 'razlen': 50, 'select': True, 'substitution': 3, 'transmemb_doms_limit': 3, 'virlimit': 0.5, 'virulent': False, 'working_dir': './tuning/gram_neg_test', 'NERVE_dir': '../../', 'iFeature_dir': '/iFeature', 'DeepFri_dir': '/DeepFri'}
NERVE executable path is:
../../code/NERVE.py


(b'Start NERVE 1.5\n10% done\n20% done\n30% done\n40% done\n50% done\n60% done\n70% done\n80% done\n90% done\n100% done\nEnd NERVE computation successfully.\n',
 None)

In [73]:
test = RunNerve()
test.args['proteome1'] = './tuning/gram_pos.fasta'
test.args['gram'] = 'n'
test.args.pop('proteome2')
test.args['working_dir'] = './tuning/gram_pos_test'
test.args['virulent'] = False
test.print_args()
test.run()

Arguments are:
{'annotation': True, 'e_value': 1e-10, 'gram': 'n', 'minlength': 9, 'mismatch': 1, 'mouse': True, 'mouse_peptides_sum_limit': 0.15, 'proteome1': './tuning/gram_pos.fasta', 'p_ad_extracellular_filter': 0.38, 'p_ad_no_citoplasm_filter': 0.46, 'padlimit': 0.5, 'razor': True, 'razlen': 50, 'select': True, 'substitution': 3, 'transmemb_doms_limit': 3, 'virlimit': 0.5, 'virulent': False, 'working_dir': './tuning/gram_pos_test', 'NERVE_dir': '../../', 'iFeature_dir': '/iFeature', 'DeepFri_dir': '/DeepFri'}
NERVE executable path is:
../../code/NERVE.py


(b'Start NERVE 1.5\n10% done\n20% done\n30% done\n40% done\n50% done\n60% done\n70% done\n80% done\n90% done\n100% done\nEnd NERVE computation successfully.\n',
 None)

In [None]:
# to do:
# remove from nerve output useless files
# remove "DeepFri predictions:"
# split program into modules
# remove subcell option