Skip to content

Commit

Permalink
analysis tests (#62)
Browse files Browse the repository at this point in the history
* test_get_dt first draft

* make pylint happy

* deprecate unused methods in analysis

* solved bug in get_dt for dataframes with repeated entries, improve coverage of the same function

* adopt new formatting for filenames, add unit testing for get_params_from_file_name

* solve code quality issues

* add unit tests for block_analyze, deprecate unused method

* remove dependency on deprecated function

* add missing testing data

* Add unit tests for all analysis functions

* shorten the time series for testing

* add missing file, fix format of the filename

* sort columns before testing

* ignore different types

* Add license to new unit test

* docstrings: fix format inconsistencies and variable type ambiguities

* fix some remamaining formatting issues

---------

Co-authored-by: blancoapa <pablb@ntnu.no>
  • Loading branch information
kosovan and pm-blanco committed Jun 7, 2024
1 parent 2496835 commit 5e98340
Show file tree
Hide file tree
Showing 13 changed files with 3,448 additions and 413 deletions.
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ unit_tests:
${PYTHON} testsuite/calculate_net_charge_unit_test.py
${PYTHON} testsuite/setup_salt_ions_unit_tests.py
${PYTHON} testsuite/globular_protein_unit_tests.py
${PYTHON} testsuite/analysis_tests.py

functional_tests:
${PYTHON} testsuite/cph_ideal_tests.py
Expand Down
421 changes: 140 additions & 281 deletions lib/analysis.py

Large diffs are not rendered by default.

252 changes: 126 additions & 126 deletions pyMBE.py

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion samples/Beyer2024/globular_protein.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@
pH_value = args.pH

inputs={"pH": args.pH,
"protein_pdb": args.pdb}
"pdb": args.pdb}

#System Parameters
LANGEVIN_SEED = 77
Expand Down
155 changes: 155 additions & 0 deletions testsuite/analysis_tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
#
# Copyright (C) 2024 pyMBE-dev team
#
# This file is part of pyMBE.
#
# pyMBE is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# pyMBE is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

import unittest as ut
import pandas as pd
import lib.analysis as ana


class Serialization(ut.TestCase):

def test_analyze_time_series(self):
print("*** Unit test: test that analysis.analyze_time_series analyzes all data in a folder correctly ***")
analyzed_data = ana.analyze_time_series(path_to_datafolder="testsuite/tests_data",
filename_extension="_time_series.csv",
minus_separator=True)
analyzed_data[["Dens","eps"]] = analyzed_data[["Dens","eps"]].apply(pd.to_numeric)
reference_data = pd.read_csv("testsuite/tests_data/average_data.csv", header=[0,1])
analyzed_data.columns = analyzed_data.sort_index(axis=1,level=[0,1],ascending=[True,True]).columns
reference_data.columns = reference_data.sort_index(axis=1,level=[0,1],ascending=[True,True]).columns
pd.testing.assert_frame_equal(analyzed_data.dropna(),reference_data.dropna(), check_column_type=False, check_dtype=False)
print("*** Unit passed ***")

return


def test_get_dt(self):
print("*** Unit test: test that analysis.get_dt returns the right time step ***")
data = pd.DataFrame.from_dict( {'time': [0, 1, 2,], 'obs': ['1.0', '2.0', '4.0']} )
dt, n_warnings = ana.get_dt(data)
self.assertAlmostEqual(dt, 1.0, delta = 1e-7)
self.assertEqual(n_warnings, 0)
print("*** Unit passed ***")

print("*** Unit test: test that analysis.get_dt prints a warning if there are values with repeated time steps ***")
data = pd.DataFrame.from_dict( {'time': [0, 1, 1,], 'obs': ['1.0', '2.0', '4.0']} )
dt, n_warnings = ana.get_dt(data,verbose=True)
self.assertAlmostEqual(dt, 1.0, delta = 1e-7)
self.assertEqual(n_warnings, 1)
print("*** Unit passed ***")

print("*** Unit test: test that analysis.get_dt raises a ValueError if the column with the time is not found ***")
data = pd.DataFrame.from_dict( {'ns': [0, 1, 2,], 'obs': ['1.0', '2.0', '4.0']} )
inputs = {"data": data}
self.assertRaises(ValueError, ana.get_dt, **inputs)

print("*** Unit passed ***")

print("*** Unit test: test that analysis.get_dt raises a ValueError if the time is stored at uneven intervals ***")
data = pd.DataFrame.from_dict( {'time': [0, 1, 4,], 'obs': ['1.0', '2.0', '4.0']} )
inputs = {"data": data}
self.assertRaises(ValueError, ana.get_dt, **inputs)

print("*** Unit passed ***")

def test_add_data_to_df(self):
print("*** Unit test: test that analysis.add_data_to_df creates a Pandas Dataframe from a dictionary correctly ***")
data = {'A': [2],
'B': ['1.0']}
reference_df = pd.DataFrame(data,
index=[0])

analysis_df = ana.add_data_to_df(df=pd.DataFrame(),
data_dict=data,
index=[0])
pd.testing.assert_frame_equal(reference_df,analysis_df)
print("*** Unit passed ***")
print("*** Unit test: test that analysis.add_data_to_df updates a Pandas Dataframe with new data from dictionary correctly ***")
data ["C"] = False
reference_df = pd.concat([reference_df, pd.DataFrame(data,index=[len(analysis_df)])])
analysis_df = ana.add_data_to_df(df=analysis_df,
data_dict=data,
index=[len(analysis_df)])
print("*** Unit passed ***")


def test_get_params_from_file_name(self):
print("*** Unit test: test that get_params_from_file_name parses a filename without minus separator ***")
filename = 'density_0.001_N_1000_T_2.00.csv'
correct_params = {'density': '0.001', 'N': '1000', 'T': '2.00'}
params = ana.get_params_from_file_name(filename,
minus_separator = False)
self.assertEqual(correct_params,params)
print("*** Unit passed ***")

print("*** Unit test: test that get_params_from_file_name parses a filename with minus separator ***")
filename = 'N-064_Solvent-good_Init-coil_time_series.csv'
correct_params = {'N': 64, 'Solvent': 'good', 'Init': 'coil'}
params = ana.get_params_from_file_name(filename,
minus_separator = True,
filename_extension="_time_series.csv")
self.assertEqual(correct_params,params)
print("*** Unit passed ***")

print("*** Unit test: test that get_params_from_file_name parses a filename with a different extension ***")
filename = 'density_0.001_N_1000_T_2.00_time_series.txt'
correct_params = {'density': '0.001', 'N': '1000', 'T': '2.00'}
params = ana.get_params_from_file_name(filename,
minus_separator = False,
filename_extension="_time_series.txt")
self.assertEqual(correct_params,params)
print("*** Unit passed ***")

print("*** Unit test: test that get_params_from_file_name raises a ValueError if a filename with a wrong formating is provided ***")
inputs = {"file_name": 'density_0.001_N_1000_T_f_2.00_time_series.txt',
"filename_extension": "_time_series.txt"}
self.assertRaises(ValueError, ana.get_params_from_file_name, **inputs)
print("*** Unit passed ***")

def test_block_analyze(self):
print("*** Unit test: test that block_analyze yields the expected outputs and reports the number of blocks and the block size. It should print that it encountered 1 repeated time value. ***")
data = pd.read_csv("testsuite/tests_data/N-064_Solvent-good_Init-coil_time_series.csv")
analyzed_data = ana.block_analyze(full_data=data, verbose=True)
analyzed_data = ana.add_data_to_df(df=pd.DataFrame(),
data_dict=analyzed_data.to_dict(),
index=[0])
reference_data = pd.read_csv("testsuite/tests_data/N-064_Solvent-good_Init-coil_time_series_analyzed.csv", header=[0,1])
pd.testing.assert_frame_equal(analyzed_data.dropna(),reference_data.dropna(), check_column_type=False)
print("*** Unit passed ***")

print("*** Unit test: test that block_analyze analyzes correcly a subset of the data ***")
analyzed_data = ana.block_analyze(full_data=data, columns_to_analyze="Rg")
analyzed_data = ana.add_data_to_df(df=pd.DataFrame(),
data_dict=analyzed_data.to_dict(),
index=[0])
reference_data = pd.read_csv("testsuite/tests_data/N-064_Solvent-good_Init-coil_time_series_analyzed.csv", header=[0,1])
reference_data = reference_data[[("mean","Rg"),("err_mean","Rg"),("n_eff","Rg"),("tau_int","Rg")]]
pd.testing.assert_frame_equal(analyzed_data.dropna(),reference_data.dropna(), check_column_type=False)
print("*** Unit passed ***")

print("*** Unit test: test that block_analyze raises a ValueError if there is no time column ***")
data = pd.DataFrame.from_dict( {'ns': [0, 1, 2,], 'obs': ['1.0', '2.0', '4.0']} )
inputs = {"full_data": data, "verbose": False, "dt": 1}
self.assertRaises(ValueError, ana.block_analyze, **inputs)

print("*** Unit passed ***")


if __name__ == "__main__":
print("*** lib.analysis unit tests ***")
ut.main()
4 changes: 3 additions & 1 deletion testsuite/gcmc_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,10 @@ def gcmc_test(script_path, mode):
print(subprocess.list2cmdline(run_command))
subprocess.check_output(run_command)
# Analyze all time series
data=analysis.analyze_time_series(path_to_datafolder=time_series_path)
data=analysis.analyze_time_series(path_to_datafolder=time_series_path,
filename_extension="_time_series.csv")

print(data["csalt","value"])
# Check concentration
test_concentration=np.sort(data["csalt","value"].to_numpy(dtype=float))
ref_concentration=np.sort(data["mean","c_salt"].to_numpy())
Expand Down
3 changes: 2 additions & 1 deletion testsuite/globular_protein_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,8 @@ def run_protein_test(script_path, test_pH_values, protein_pdb, rtol, atol,mode="
print(subprocess.list2cmdline(run_command))
subprocess.check_output(run_command)
# Analyze all time series
data=analysis.analyze_time_series(path_to_datafolder=time_series_path)
data=analysis.analyze_time_series(path_to_datafolder=time_series_path,
filename_extension="_time_series.csv")

data_path=pmb.get_resource(path="testsuite/globular_protein_tests_data")

Expand Down
4 changes: 2 additions & 2 deletions testsuite/serialization_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ def test_json_encoder(self):
def test_parameters_to_path(self):
params = {"kT": 2., "phi": -np.pi, "n": 3, "fene": True, "name": "pep"}
name = lib.analysis.built_output_name(params)
self.assertEqual(name, "kT-2_phi--3.14_n-3_fene-True_name-pep")
params_out = lib.analysis.get_params_from_dir_name(name)
self.assertEqual(name, "kT_2_phi_-3.14_n_3_fene_True_name_pep")
params_out = lib.analysis.get_params_from_file_name(name)
params_ref = {"kT": "2", "phi": "-3.14", "n": "3",
"fene": "True", "name": "pep"}
self.assertEqual(params_out, params_ref)
Expand Down
Loading

0 comments on commit 5e98340

Please sign in to comment.