### Usage tutorial

In [3]:
# print available options
!/NERVE/code/NERVE.py -h

usage: NERVE.py [-h] [-a ] [-ev ] -g  [-ml ] [-mm ] [-m ] [-mpsl ] -p1
                 [-p2 ] [-paefilter ] [-pacfilter ] [-pl ] [-rz ]
                [-rl ] [-s ] [-ss ] [-tdl ] [-vl ] [-vir ] [-wd ]
                [-nd ] [-id ] [-dfd ]

Run vaccine candidate prediction

optional arguments:
  -h, --help            show this help message and exit
  -a , --annotation   Activation (True) or deactivation (False) of
                        annotation module. Uses DeepFri to retrieve protein
                        functional onthologies (default: True)
  -ev , --e_value     Expect-value used in blastp for immunity modules
                        (default: 1e-10)
  -g , --gram         Negative (n) or positive (p) gram stain of the
                        pathogen of interest (default: None)
  -ml , --minlength   Minimal length required for shared peptides to be
                        extracted in comparison analyses versus human and/or
  

In [5]:
# run on Neisseria meningitidis serogroup B (strain MC58) (MC58) (uniprot id: UP000000425)
# note that it is essential to specify the gram type (-g option)
!/NERVE/code/NERVE.py -p1 UP000002493 -p2 UP000002493 -wd ./UP000002493 -g n

Start NERVE 1.5
10% done
20% done
30% done
40% done
50% done
60% done
70% done
80% done
90% done
100% done
End NERVE computation successfully.


In [6]:
# explore the results
import os
os.listdir("./UP000002493/")

['proteome1.fasta',
 'cleaned_proteome1.fasta',
 'logfile.log',
 'vaccine_candidates.csv',
 'discarded_sequences_proteome1.fasta',
 'discarded_proteins.csv']

In [7]:
# vaccine_candidates.csv contains vaccine candidates extracted from the input proteome with the select module
# while discarded_proteins.csv contains excluded proteins
import pandas as pd
vaccine_candidates=pd.read_csv("./UP000002493/vaccine_candidates.csv")
vaccine_candidates

Unnamed: 0,id,uniprot_accession_code,score,length,transmembrane_doms,localization,localization score,virulence_probability,adhesin_probability,conservation_score,...,shared_mouse_peps,shared_conserv_proteome_peps,human_peptides_sum,mouse_peptides_sum,annotations,list_of_peptides_from_comparison_with_mhcpep_sapiens,list_of_peptides_from_comparison_with_mhcpep_mouse,sequence,original_sequence_if_razor,tmhmm_seq
0,tr|Q87GL4|Q87GL4_VIBPA Pentapeptide repeat-con...,Q87GL4,0.9813,682,0,Extracellular,9.65,0.9445,0.9592,,...,0,0,0.0000,0.0000,,,,MKSIASYQIKFKVLFTLTCSCIFATACNSDNTSTEIQSKLLVEKDF...,,
1,tr|Q87HS4|Q87HS4_VIBPA Heme transport protein ...,Q87HS4,0.9799,693,0,OuterMembrane,10.00,0.9315,0.9282,,...,0,0,0.0000,0.0000,,,,MYNKSILSASILIALSQGAYAEDHSTFNEVVVTATRTNSQIEDTAA...,,
2,tr|Q87IP5|Q87IP5_VIBPA Alginate_exp domain-con...,Q87IP5,0.9789,405,0,OuterMembrane,9.49,0.9344,0.9686,,...,0,0,0.0000,0.0000,,,,MHSGLHIRFSPPWMVVVSQALSLSLLGFFSFCAVANIPGLQPQKSW...,,
3,tr|Q87JI6|Q87JI6_VIBPA Flagellar hook protein ...,Q87JI6,0.9743,398,0,Extracellular,10.00,0.8660,0.9540,,...,0,0,0.0000,0.0000,,,,MSFNIALSGLDATNTELNTISHNIANASTYGFKGARTEFAAVYNGM...,,
4,tr|Q87GE5|Q87GE5_VIBPA Type III effector HopI1...,Q87GE5,0.9742,484,0,Extracellular,9.72,0.9644,0.8827,,...,0,0,0.0000,0.0000,,,,MLKIKLPQQTSLAPSSETTQRLPVKISIKSICNKSICKTLHSLADK...,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1678,tr|Q87J15|Q87J15_VIBPA ribonuclease H OS=Vibri...,Q87J15,0.5034,169,0,Unknown,0.00,0.9290,0.0152,,...,2,0,0.1065,0.1065,,,,MNTQLQTTHLLHNAQHPLSIYCDGSAPDNQHGCLQGGVGIAVYDAL...,,
1679,tr|Q87M94|Q87M94_VIBPA 4-methyl-5(B-hydroxyeth...,Q87M94,0.4849,199,0,Periplasmic,9.51,0.1114,0.0067,,...,9,0,0.1256,0.1256,,,,MSKKILVPIAPGTEEMEAVTVIDLMVRAGYDVTVASAAFDGALTMK...,,
1680,sp|Q87KZ9|PSD_VIBPA Phosphatidylserine decarbo...,Q87KZ9,0.4835,285,0,CytoplasmicMembrane,7.88,0.2315,0.0492,,...,4,0,0.1263,0.1263,,,,MDKIKVGLQYWIPQHGLTRLVGKLASAKAGSLTTAVIRWFIKQYNV...,,
1681,tr|Q87T63|Q87T63_VIBPA Putative oxidoreductase...,Q87T63,0.4422,253,0,Unknown,0.00,0.8465,0.0673,,...,8,0,0.1462,0.1265,,,,MLKDKKIVIAGAGGLLGASVVKSILEAGGSVVATDVSLEHLKARLS...,,


In [8]:
# 264 proteins have been selected as potential vaccine candidates.
discarded_proteins=pd.read_csv("./UP000000425/discarded_proteins.csv")
discarded_proteins

Unnamed: 0.1,Unnamed: 0,id,uniprot_accession_code,length,transmembrane_doms,localization,localization score,virulence_probability,adhesin_probability,conservation_score,...,list_of_shared_mouse_peps,list_of_shared_conserv_proteome_peps,human_peptides_sum,mouse_peptides_sum,annotations,list_of_peptides_from_comparison_with_mhcpep_sapiens,list_of_peptides_from_comparison_with_mhcpep_mouse,sequence,original_sequence_if_razor,tmhmm_seq
0,0,sp|P72097|LST_NEIMB N-acetyllactosaminide alph...,P72097,371,1,Unknown,0.00,0.4272,0.6133,,...,0,0,0.0,0.0,,,,MGLKKACLTVLCLIVFCFGIFYTFDRVNQGERNAVSLLKEKLFNEE...,,iiiiiiMMMMMMMMMMMMMMMMMMOOOOOOOOOOOOOOOOOOOOOO...
1,1,sp|P95370|CYSG_NEIMB Siroheme synthase OS=Neis...,P95370,483,0,CytoplasmicMembrane,8.46,0.2621,0.3837,,...,0,0,0.0,0.0,"DeepFri predictions: transferase activity, tra...",,,MNYFPIFANLAGRPVLVVGGGAVAARKISLLLKAGAEVRVAAKHLN...,,
2,2,sp|Q51161|DHPS_NEIMB Dihydropteroate synthase ...,Q51161,285,0,Cytoplasmic,9.97,0.1428,0.0122,,...,0,0,0.0,0.0,DeepFri predictions:,,,MARHVWQAGRFEIGLDKPKIMGIVNLTPDSFSDGGVYSQNAQTALA...,,
3,3,sp|Q7DDR9|NMFIC_NEIMB Protein adenylyltransfer...,Q7DDR9,191,0,Cytoplasmic,8.96,0.1531,0.0114,,...,0,0,0.0,0.0,DeepFri predictions:,,,MPSENPIGKTMKSIDEQSLHNARRLFESGDIDRIEVGTTAGLQQIH...,,
4,4,sp|P0A0S8|FUR_NEIMB Ferric uptake regulation p...,P0A0S8,144,0,Cytoplasmic,9.97,0.1214,0.0607,,...,0,0,0.0,0.0,DeepFri predictions:,,,MEKFNNIAQLKDSGLKVTGPRLKILDLFETHAEEHLSAEDVYRILL...,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1731,1731,tr|Q9K1Q7|Q9K1Q7_NEIMB Integral membrane prote...,Q9K1Q7,149,4,CytoplasmicMembrane,10.00,0.1609,0.0086,,...,0,0,0.0,0.0,DeepFri predictions:,,,MSIYAVAHIVHLYCAIAFVGGVFFEVLVLSVLHTGRVSREARREVE...,,oooooooooMMMMMMMMMMMMMMMMMMMMMMMiiiiiiiiiiiiii...
1732,1732,tr|Q9K1Q8|Q9K1Q8_NEIMB TerC family protein OS=...,Q9K1Q8,323,9,CytoplasmicMembrane,10.00,0.2476,0.0213,,...,0,0,0.0,0.0,DeepFri predictions:,,,MTEYPGIGSPLFYGVFFAAVLVMIALDMFSLKKNGSHKVGVKEALA...,,oooooooooMMMMMMMMMMMMMMMMMMMMMMMiiiiiiiiiiiiMM...
1733,1733,tr|Q9K1R1|Q9K1R1_NEIMB BolA/YrbA family protei...,Q9K1R1,82,0,Unknown,0.00,0.1321,0.0115,,...,0,0,0.0,0.0,DeepFri predictions:,,,MLTSEQVKAMIEGVAKCEHIEVEGDGHHFFAVIVSSEFEGKARLAR...,,
1734,1734,tr|Q9K1R5|Q9K1R5_NEIMB EpiH/GdmH-related prote...,Q9K1R5,214,1,Unknown,0.00,0.6686,0.0936,,...,0,0,0.0,0.0,DeepFri predictions:,,,MNKRLFCSRNGLRYYLLGGFCLSVFPLLLVFASSVWAVYRTGGQVL...,,iiiiiiiiiiiiiiiMMMMMMMMMMMMMMMMMMMMMMMOOOOOOOO...


In [2]:
# 1735 proteins have been discarded.