merge main

pyMBE-dev · Jun 4, 2024 · 288b99d · 288b99d
2 parents afbfb2a + 55ea6c8
commit 288b99d
Show file tree

Hide file tree

Showing 22 changed files with 1,492 additions and 469 deletions.
diff --git a/AUTHORS.md b/AUTHORS.md
@@ -1,12 +1,14 @@
 pyMBE is maintained and developed by several authors from different institutions in collaboration.
 The following people have contributed to pyMBE:
 
-## Core developers
-- David Beyer (University of Stuttgart, Germany)
+## Admins
 - Pablo M. Blanco (Norwegian University of Science and Tecnology, Norway)
 - Jean-Noël Grad (University of Stuttgart, Germany)
-- Sebastian P. Pineda (Charles University, Czech Republic)
 - Peter Košovan (Charles University, Czech Republic)
+
+## Core developers
+- David Beyer (University of Stuttgart, Germany)
+- Sebastian P. Pineda (Charles University, Czech Republic)
 - Paola B. Torres (Universidad Tecnologica Nacional, Argentina)
 
 ## Contributors

diff --git a/Makefile b/Makefile
@@ -25,11 +25,15 @@ unit_tests:
 	${PYTHON} testsuite/set_particle_acidity_test.py
 	${PYTHON} testsuite/bond_tests.py
 	${PYTHON} testsuite/generate_perpendicular_vectors_test.py
+	${PYTHON} testsuite/define_and_create_molecules_unit_tests.py
 	${PYTHON} testsuite/create_molecule_position_test.py
 	${PYTHON} testsuite/seed_test.py
 	${PYTHON} testsuite/read-write-df_test.py
 	${PYTHON} testsuite/parameter_test.py
 	${PYTHON} testsuite/henderson_hasselbalch_tests.py
+	${PYTHON} testsuite/calculate_net_charge_unit_test.py
+	${PYTHON} testsuite/setup_salt_ions_unit_tests.py
+	${PYTHON} testsuite/globular_protein_unit_tests.py
 	${PYTHON} testsuite/analysis_tests.py
 
 functional_tests:

diff --git a/lib/create_cg_from_pdb.py b/lib/create_cg_from_pdb.py
@@ -279,10 +279,10 @@ def create_sidechain_beads  (pdb_df) :
     pdb_df = pdb_df.loc[(pdb_df.atom_name != 'O') & (pdb_df.atom_name != 'C') & \
                 (pdb_df.atom_name != 'N') & (pdb_df.atom_name != 'CA') & (pdb_df.residue_name != 'GLY') & (pdb_df.atom_name != 'ca')  ]
 
-    pdb_df['sum_x_cm'] = pdb_df.groupby(['residue_number'])['x_cm'].transform(sum)
-    pdb_df['sum_y_cm'] = pdb_df.groupby(['residue_number'])['y_cm'].transform(sum)
-    pdb_df['sum_z_cm'] = pdb_df.groupby(['residue_number'])['z_cm'].transform(sum)
-    pdb_df['sum_mass'] = pdb_df.groupby(['residue_number'])['mass'].transform(sum)
+    pdb_df['sum_x_cm'] = pdb_df.groupby(['residue_number'])['x_cm'].transform("sum")
+    pdb_df['sum_y_cm'] = pdb_df.groupby(['residue_number'])['y_cm'].transform("sum")
+    pdb_df['sum_z_cm'] = pdb_df.groupby(['residue_number'])['z_cm'].transform("sum")
+    pdb_df['sum_mass'] = pdb_df.groupby(['residue_number'])['mass'].transform("sum")
     pdb_df['sum_res'] = pdb_df.groupby('residue_number')['residue_number'].transform('count')
 
     pdb_df['x_pos'] = pdb_df ['sum_x_cm']/ pdb_df['sum_mass']
@@ -298,7 +298,7 @@ def create_sidechain_beads  (pdb_df) :
     cols_to_sum = ['delta_x','delta_y','delta_z']
 
     pdb_df['sum_rg'] = pdb_df[cols_to_sum].sum(axis=1)
-    pdb_df['sum_rg'] = pdb_df.groupby(['residue_number'])['sum_rg'].transform(sum)
+    pdb_df['sum_rg'] = pdb_df.groupby(['residue_number'])['sum_rg'].transform("sum")
     pdb_df['radius_r'] = np.sqrt(pdb_df['sum_rg']/pdb_df['sum_res'],dtype='float').round(4)
 
     pdb_df = pdb_df.drop_duplicates(subset='residue_number')
@@ -310,7 +310,7 @@ def create_sidechain_beads  (pdb_df) :
     y_coord_r = pdb_df['y_pos']
     z_coord_r = pdb_df['z_pos']
 
-    pdb_df['radius_mean'] = pdb_df.groupby(['residue_name'])['radius_r'].transform (np.mean).round(4)
+    pdb_df['radius_mean'] = pdb_df.groupby(['residue_name'])['radius_r'].transform ("mean").round(4)
 
     atom_number_pdb_r = pd.Series(pdb_df.residue_number)
     resname_r = pd.Series(pdb_df.residue_name)
@@ -332,14 +332,12 @@ def check_sidechain_radius (pdb_df):
     Args:
         pdb_df (cls): pandas df with the protein information.
     """
-
-    for bead in pdb_df.residue_name.keys():
-        if pdb_df.sum_res[bead] == 1:
-            if pdb_df.residue_name[bead] == 'ALA':
-                pdb_df.radius_r [bead] = 1.0
-            else:
-                verbose_print (message=f'{pdb_df.residue_name[bead]} from chain {pdb_df.chain_id[bead]} has missing atoms.', verbose=args.verbose)
-                pdb_df.radius_r [bead] = 1.0
+    for index in pdb_df[pdb_df['sum_res']==1].index:
+        if pdb_df.loc[index, "residue_name"] == "ALA":
+            pdb_df.loc[index, "radius_r"] = 1
+        else:
+            verbose_print (message=f'{pdb_df.loc[index, "residue_name"]} from chain {pdb_df.loc[index, "chain_id"]} has missing atoms.', verbose=args.verbose)
+            pdb_df.loc[index, "radius_r"] = 1
     return 0
 
 def merge_alpha_carbon_and_sidechain (alpha_carbons_and_terminals,residues_bead):

diff --git a/pyMBE.py b/pyMBE.py
diff --git a/samples/Beyer2024/create_paper_data.py b/samples/Beyer2024/create_paper_data.py
@@ -56,38 +56,21 @@
         print(subprocess.list2cmdline(run_command))
         subprocess.check_output(run_command)
 
-
-
 ## Protein plots (Fig. 8)
-
 labels_fig8=["8a", "8b"]
 
 if fig_label in labels_fig8:
-
     script_path=pmb.get_resource("samples/Beyer2024/globular_protein.py")
     pH_range = np.linspace(2, 7, num=11)
     run_command_common=[sys.executable, script_path, "--mode", mode, "--no_verbose"]
-
-    if fig_label == "8a":
-
-        protein_pdb = "1f6s"
-        path_to_cg = f"parameters/globular_proteins/{protein_pdb}.vtf"
-        for pH in pH_range:
-
-            run_command=run_command_common + ["--pH", str(pH),"--pdb", protein_pdb, "--path_to_cg", path_to_cg, "--metal_ion_name", "Ca", "--metal_ion_charge", str(2)]
-            print(subprocess.list2cmdline(run_command))
-            subprocess.check_output(run_command)
-
-    elif fig_label == "8b":
-        protein_pdb = "1beb"
-        path_to_cg = f"parameters/globular_proteins/{protein_pdb}.vtf"
-        for pH in pH_range:
-            run_command=run_command_common + ["--pH", str(pH),"--pdb", protein_pdb, "--path_to_cg", path_to_cg]
-            print(subprocess.list2cmdline(run_command))
-            subprocess.check_output(run_command)
-    else:
-        raise RuntimeError()
-
+    pdb_codes={"8a":"1f6s",
+               "8b": "1beb"}
+    protein_pdb=pdb_codes[fig_label]
+    path_to_cg = f"parameters/globular_proteins/{protein_pdb}.vtf"
+    for pH in pH_range:        
+        run_command=run_command_common + ["--pH", str(pH),"--pdb", protein_pdb, "--path_to_cg", path_to_cg]
+        print(subprocess.list2cmdline(run_command))
+        subprocess.check_output(run_command)   
 
 ## Weak polyelectrolyte dialysis plot (Fig. 9)
 if fig_label == "9": 
@@ -156,10 +139,9 @@
     elif fig_label in ["7c", "8a", "8b"]:
         pka_path=pmb.get_resource("parameters/pka_sets/Nozaki1967.json")
         pmb.load_pka_set (filename=pka_path)
-        # FIXME: this is only necessary due to an undesired feature in calculate_HH
-        # that forces to have all particles defined in pyMBE
-        par_path=pmb.get_resource("parameters/peptides/Blanco2021.json")
-        pmb.load_interaction_parameters(par_path)
+        if fig_label == "7c":
+            par_path=pmb.get_resource("parameters/peptides/Blanco2021.json")
+            pmb.load_interaction_parameters(par_path)
 
     # Load ref data    
     if fig_label == "7a":
@@ -208,15 +190,15 @@
 
         pmb.define_protein (name=protein_pdb, 
                             topology_dict=topology_dict, 
-                            model = '2beadAA')
-
+                            model = '2beadAA',
+                            verbose= False)
+
         pH_range_HH = np.linspace(2, 7, num=1000)
 
         Z_HH = pmb.calculate_HH(molecule_name=protein_pdb,
                                 pH_list=pH_range_HH)
 
-        print (Z_HH)
-         # Plot HH
+        # Plot HH
         plt.plot(pH_range_HH,
                Z_HH, 
                label=r"HH", 
@@ -313,11 +295,8 @@
         plt.xticks([2,4,6,8,10,12])
 
     elif fig_label in labels_fig8:   
-
         data=data.astype({("pH","value"): 'float'}).sort_values(by=("pH","value"))
         data=data[data.pdb.value == f'{protein_pdb}']
-
-
         plt.errorbar(data["pH"]["value"], 
                    data["mean","charge"], 
                    yerr=data["err_mean","charge"], 

diff --git a/samples/Beyer2024/globular_protein.py b/samples/Beyer2024/globular_protein.py
@@ -4,7 +4,7 @@
 import argparse
 import numpy as np
 import pandas as pd 
-
+from espressomd.io.writer import vtf
 # Create an instance of pyMBE library
 import pyMBE
 pmb = pyMBE.pymbe_library(SEED=42)
@@ -41,16 +41,6 @@
                     required= False, 
                     default=False,  
                     help='Activates the motion of the protein')
-parser.add_argument('--metal_ion_name', 
-                    type=str, 
-                    required= False, 
-                    default=None,  
-                    help='Name of the metal ion in the protein')
-parser.add_argument('--metal_ion_charge', 
-                    type=int, 
-                    required= False, 
-                    default=None,  
-                    help='Charge of the metal ion in the protein')
 
 parser.add_argument('--mode',
                     type=str,
@@ -62,14 +52,11 @@
                     required= False,
                     help='output directory')
 
-parser.add_argument('--no_verbose', action='store_false', help="Switch to deactivate verbose")
-
+parser.add_argument('--no_verbose', action='store_false', help="Switch to deactivate verbose",default=True)
 
 args = parser.parse_args ()
-
 mode=args.mode
 verbose=args.no_verbose
-
 protein_name = args.pdb
 pH_value = args.pH 
 
@@ -85,6 +72,8 @@
 Box_L = Box_V**(1./3.) 
 solvent_permitivity = 78.3
 epsilon = 1*pmb.units('reduced_energy')
+sigma = 1*pmb.units("reduced_length")
+ion_size = 0.4*pmb.units.nm
 
 #Simulation Parameters
  #  in LJ units of time
@@ -126,45 +115,37 @@
 path_to_cg=pmb.get_resource(args.path_to_cg)
 topology_dict = pmb.read_protein_vtf_in_df (filename=path_to_cg)
 #Defines the protein in the pmb.df
-pmb.define_protein (name=protein_name, topology_dict=topology_dict, model = '2beadAA')
-
-#Create dictionary with the value of epsilon and sigma for each residue
-clean_sequence = pmb.df.loc[pmb.df['name']== protein_name].sequence.values[0]
-
-epsilon_dict = {}
-
-for residue in clean_sequence:
-    if residue not in epsilon_dict.keys():
-        epsilon_dict [residue] = epsilon
-    epsilon_dict  ['CA'] = epsilon
-
-#Define epsilon for each particle into pmb.df
-pmb.define_particles_parameter_from_dict (param_dict = epsilon_dict,
-                                            param_name ='epsilon')
-
-#Defines the metal ion present in the protein 
-if args.metal_ion_name is not None:
-    pmb.define_particle(name = args.metal_ion_name, 
-                        q=args.metal_ion_charge, 
-                        sigma=0.355*pmb.units.nm, 
-                        epsilon=epsilon)
+pmb.define_protein (name=protein_name, 
+                    topology_dict=topology_dict, 
+                    model = '2beadAA',
+                    lj_setup_mode = "wca")
 
 # Here we define the solution particles in the pmb.df 
 cation_name = 'Na'
 anion_name = 'Cl'
 
-pmb.define_particle(name = cation_name, q = 1, sigma=0.4*pmb.units.nm, epsilon=epsilon)
-pmb.define_particle(name = anion_name,  q =-1, sigma=0.4*pmb.units.nm, epsilon=epsilon)
+pmb.define_particle(name = cation_name, 
+                    q = 1, 
+                    sigma=sigma, 
+                    epsilon=epsilon,
+                    offset=ion_size-sigma)
+
+pmb.define_particle(name = anion_name,  
+                    q =-1, 
+                    sigma=sigma, 
+                    epsilon=epsilon,
+                    offset=ion_size-sigma)
 
 # Here we upload the pka set from the reference_parameters folder
 path_to_pka=pmb.get_resource('parameters/pka_sets/Nozaki1967.json') 
-pmb.load_pka_set (filename=path_to_pka)
+pmb.load_pka_set(filename=path_to_pka)
 
 #We create the protein in espresso 
 pmb.create_protein(name=protein_name,
-                               number_of_proteins=1,
-                               espresso_system=espresso_system,
-                               topology_dict=topology_dict)
+                    number_of_proteins=1,
+                    espresso_system=espresso_system,
+                    topology_dict=topology_dict)
+
 #Here we activate the motion of the protein 
 if args.move_protein:
     pmb.enable_motion_of_rigid_object(espresso_system=espresso_system,
@@ -181,10 +162,10 @@
                         anion_name=anion_name,
                         espresso_system=espresso_system)
 
-c_salt_calculated = pmb.create_added_salt (espresso_system=espresso_system,
-                                                    cation_name=cation_name,
-                                                    anion_name=anion_name,
-                                                    c_salt=c_salt)
+c_salt_calculated = pmb.create_added_salt(espresso_system=espresso_system,
+                                          cation_name=cation_name,
+                                          anion_name=anion_name,
+                                          c_salt=c_salt)
 
 #Here we calculated the ionisible groups 
 basic_groups = pmb.df.loc[(~pmb.df['particle_id'].isna()) & (pmb.df['acidity']=='basic')].name.to_list()
@@ -218,17 +199,16 @@
 if WCA:
     pmb.setup_lj_interactions (espresso_system=espresso_system)
     minimize_espresso_system_energy (espresso_system=espresso_system)
-
     if Electrostatics:
-
         setup_electrostatic_interactions (units=pmb.units,
                                         espresso_system=espresso_system,
                                         kT=pmb.kT)
 
 #Save the initial state 
 n_frame = 0
-pmb.write_output_vtf_file(espresso_system=espresso_system,
-                        filename=f"frames/trajectory{n_frame}.vtf")
+with open('frames/trajectory'+str(n_frame)+'.vtf', mode='w+t') as coordinates:
+    vtf.writevsf(espresso_system, coordinates)
+    vtf.writevcf(espresso_system, coordinates)
 
 setup_langevin_dynamics (espresso_system=espresso_system, 
                                     kT = pmb.kT, 
@@ -238,7 +218,6 @@
 time_step = []
 net_charge_list = []
 
-
 Z_sim=[]
 particle_id_list = pmb.df.loc[~pmb.df['molecule_id'].isna()].particle_id.dropna().to_list()
 
@@ -288,8 +267,9 @@
 
     if step % stride_traj == 0 :
         n_frame +=1
-        pmb.write_output_vtf_file(espresso_system=espresso_system,
-                                    filename=f"frames/trajectory{n_frame}.vtf")
+        with open('frames/trajectory'+str(n_frame)+'.vtf', mode='w+t') as coordinates:
+            vtf.writevsf(espresso_system, coordinates)
+            vtf.writevcf(espresso_system, coordinates)
 
     # Store observables
     time_series["time"].append(espresso_system.time)