In [None]:
import json
from plotter import TrajectoryPlotter
from trajectory import DataTrajectory
from analyse import MultiTrajectoryAnalyser, SingleTrajectoryAnalyser
from utils.param_key import *

# 1. Define the file names and paths for your data
To run the experiments, the path and filenames of the data has to be set.
The following four parameters are necessary:
1. **trajectory_name** = Name of the trajectory (used for plot titles)
2. **filename_list** = The list of filenames of the trajectories (e.g. .xtc/.dcd- files)
3. **topology_filename**: .pdb-file of the trajectory
4. **folder_path** (e.g. data/protein-name): path to the folder, where the trajectory files are (make sure to have the pdb data and the trajectories in the same folder)


In [None]:
def get_files_and_kwargs(params):
    trajectory_name = params[TRAJECTORY_NAME]
    file_element = params[FILE_ELEMENT]
    if trajectory_name == '2f4k':
        filenames_list = [f'2F4K-0-protein-{i:03d}.dcd' for i in range(0, 62 + 1)] + ['tr3_unfolded.xtc',
                                                                                      'tr8_folded.xtc']
        file_kwargs = {'filename': filenames_list[file_element],
                       'topology_filename': '2f4k.pdb',
                       'folder_path': 'data/2f4k'}
    elif trajectory_name == 'prot2':
        filenames_list = ['prod_r1_nojump_prot.xtc',
                          'prod_r2_nojump_prot.xtc',
                          'prod_r3_nojump_prot.xtc']
        file_kwargs = {'filename': filenames_list[file_element],
                       'topology_filename': 'prod_r1_pbc_fit_prot_last.pdb',
                       'folder_path': 'data/ProtNo2'}
    elif trajectory_name == 'savinase':
        filenames_list = ['savinase_1.xtc', 'savinase_2.xtc']
        file_kwargs = {'filename': filenames_list[file_element],
                       'topology_filename': 'savinase.pdb',
                       'folder_path': 'data/Savinase'}
    elif trajectory_name == '2wav':
        filenames_list = [f'2WAV-0-protein-{i:03d}.dcd' for i in range(0, 136)]
        file_kwargs = {'filename': filenames_list[file_element],
                       'topology_filename': '2wav.pdb',
                       'folder_path': 'data/2WAV-0-protein',
                       'atoms': list(range(710))}
    elif trajectory_name == '5i6x':
        filenames_list = ['protein.xtc', 'system.xtc']
        file_kwargs = {'filename': filenames_list[file_element],
                       'topology_filename': '5i6x.pdb',
                       'folder_path': 'data/ser-tr'}
    elif trajectory_name == 'fs-peptide':
        filenames_list = [f'trajectory-{i}.xtc' for i in range(1, 28 + 1)]
        file_kwargs = {'filename': filenames_list[file_element], 'topology_filename': 'fs-peptide.pdb',
                       'folder_path': 'data/fs-peptide'}
    else:
        raise ValueError(f'No data trajectory was found with the name `{trajectory_name}`.')
    file_kwargs['params'] = params
    filenames_list.pop(file_element)
    return filenames_list, file_kwargs

# Initialize Model Parameters
To run the program, models with different parameters can be used and trained.
In this cell, the different parameters of the models can be set.
Although, it's **recommended using a *.json* file** instead and set the config parameters there.
In this cell the different parameters are explained, this includes important and optional parameters.
Note: The different parameters in upper-case can be imported `from utils.param_key import *`,
although the string values of the parameters are written in lower-case and should be used in the *.json*-config-files.

### Algorithms
In the following some main algorithms with its parameter settings are listed:
1. PCA
    - `{ALGORITHM_NAME: 'original_pca', NDIM: MATRIX_NDIM}` or
    - `{ALGORITHM_NAME: 'pca', NDIM: TENSOR_NDIM, USE_STD: False, ABS_EVAL_SORT: False}`
2. TICA
    - `{ALGORITHM_NAME: 'original_tica', NDIM: MATRIX_NDIM}` or
    - `{ALGORITHM_NAME: 'tica', LAG_TIME: params[LAG_TIME], NDIM: MATRIX_NDIM, USE_STD: False, ABS_EVAL_SORT: False}`
3. raw MATRIX models
    - `{ALGORITHM_NAME: 'pca', NDIM: MATRIX_NDIM}`
    - `{ALGORITHM_NAME: 'tica', NDIM: MATRIX_NDIM, LAG_TIME: params[LAG_TIME]}`

4. raw TENSOR models
    - `{ALGORITHM_NAME: 'pca', NDIM: TENSOR_NDIM}`
    - `{ALGORITHM_NAME: 'tica', NDIM: TENSOR_NDIM, LAG_TIME: params[LAG_TIME]}`

## Parameters
### Required Parameters
These parameters are **mandatory** for a correct program run!
1. ALGORITHM_NAME:
    - 'pca'
    - 'tica'
    - ('original_pca')
    - ('original_tica')
2. NDIM:
    - MATRIX_NDIM (=2)
    - TENSOR_NDIM (=3)

### Optional Parameters
The different parameters and their different options are listed here below.
1. KERNEL (Choose how to map the kernel-matrix onto the covariance matrix,
*default: kernel-matrix not used*)
    - KERNEL_ONLY
    - KERNEL_DIFFERENCE
    - KERNEL_MULTIPLICATION
2. KERNEL_TYPE (Choose the kernel-function which should be fitted on the covariance matrix,
*default (if kernel set): MY_GAUSSIAN*)
    - MY_GAUSSIAN
    - MY_EXPONENTIAL
    - MY_LINEAR
    - MY_EPANECHNIKOV
    - GAUSSIAN, EXPONENTIAL, LINEAR, EPANECHNIKOV (only with interval 1 fitting)
3. COV_FUNCTION (Choose how to calculate the Covariance-Matrix for the algorithm,
*default: np.cov*)
    - np.cov
    - np.corrcoef
    - utils.matrix_tools.co_mad
4. NTH_EIGENVECTOR (Set this parameter to *>1*, if you want to use the Eigenvalue Selection Approach
with every *n*-th eigenvector [Not recommended], *default: 1*)
    - [int]
5. LAG_TIME (Set this parameter in connection with ALGORITHM_NAME: 'tica', *default: 0*)
    - [int]

### Boolean Parameters
1. CORR_KERNEL (Set this parameter in connection with ALGORITHM_NAME 'tica'.
If *True*, then the fitted kernel-matrix is also mapped on the correlation matrix [Not recommended], *default: False*)
2. ONES_ON_KERNEL_DIAG (This parameter is useful, to force the diagonally dominant matrix properties in some cases,
*default: False*)
3. USE_STD (An additional standardizing preprocessing step can be used within the algorithm,
*default: True*)
4. CENTER_OVER_TIME (An additional standardizing preprocessing step can be used within the algorithm,
*default: True*)
5. EXTRA_DR_LAYER (Set this parameter to True, if you want to use the Eigenvalue Selection approach with a second layer
[Not Recommended], *default: False*)

In [None]:
def get_model_params_list(alg_json_file, params):
    if alg_json_file is not None:
        """        Recommended        """
        return json.load(open(alg_json_file))
        # return json.load(open('algorithm_parameters_list.json'))
    else:
        return [
            # Original Algorithms
            {ALGORITHM_NAME: 'original_pca', NDIM: MATRIX_NDIM},
            {ALGORITHM_NAME: 'original_tica', NDIM: MATRIX_NDIM},
            # ###   Insert your model parameters   ###
            {ALGORITHM_NAME: 'pca', NDIM: TENSOR_NDIM, KERNEL: KERNEL_ONLY, ANALYSE_PLOT_TYPE: PLOT_3D_MAP},
        ]

# Define parameters for different runs
In Addition to the model-parameters, the different parameters for the trajectory is important.
This information can be set above:

In [None]:
run_params_json = None  # NotYetImplemented
alg_params_json = 'config_files/algorithm/algorithm_parameters_list.json'
run_params = {
    PLOT_TYPE: COLOR_MAP,  # 'heat_map', 'color_map', '3d_map', 'explained_var_plot'
    PLOT_TICS: True,  # True, False
    CARBON_ATOMS_ONLY: True,  # True, False
    INTERACTIVE: True,  # True, False
    N_COMPONENTS: None,
    BASIS_TRANSFORMATION: False,
    USE_ANGLES: False,
    TRAJECTORY_NAME: '2f4k',
    FILE_ELEMENT: 0,
}

filename_list, kwargs = get_files_and_kwargs(run_params)
model_params_list = get_model_params_list(alg_params_json, run_params)

FileNotFoundError: [Errno 2] No such file or directory: 'config_files/algorithm/algorithm_parameters_list.json'

# Plot the Molecules with a Slider to choose the time-stem of the plotting

In [None]:
tr = DataTrajectory(**kwargs)
TrajectoryPlotter(tr).data_with_timestep_slider(min_max=None)

# Compare models (Qualitative)
Plots the different models side by side, demonstrating the reduced dimensions
NOTE: qualitative plot works only for 2 components

In [None]:
if run_params[N_COMPONENTS] != 2:
    raise ValueError("Qualitative plot only works for 2 Components")
else:
    tr = DataTrajectory(**kwargs)
    SingleTrajectoryAnalyser(tr).compare(model_params_list)

# Analyse Multiple trajectories

In [None]:
kwargs_list = [kwargs]
for filename in filename_list:
    new_kwargs = kwargs.copy()
    new_kwargs['filename'] = filename
    kwargs_list.append(new_kwargs)

## Compare Kernels

In [None]:
kernel_names = [MY_GAUSSIAN, MY_EXPONENTIAL, MY_EPANECHNIKOV]
model_params_alg_name_only = {ALGORITHM_NAME: 'pca', NDIM: TENSOR_NDIM, ANALYSE_PLOT_TYPE: KERNEL_COMPARE}
mtr = MultiTrajectoryAnalyser(kwargs_list, run_params)
mtr.compare_kernel_fitting_scores(kernel_names, model_params_alg_name_only)