# Load necessary packages and iscam_analysis software

In [None]:
import matplotlib
matplotlib.use('module://ipympl.backend_nbagg')
from matplotlib import pyplot as plt
plt.ion()

import pickle
import sys

sys.path.append('../iscam_analysis/')
from iscam_analysis import *

# Load iSCAM data into a dataset

In [None]:
# Set parameters to read a list of iscam intensity files that are going to be combined into one dataset
# Select the path where the data files are stored
path = '../example_data'
# Choose a pattern to select the files to be loaded with
# A '*' can be any chain of characters. ''*.h5' will
# therefore match all files ending with '.h5'.
pattern = '*fitted*.h5'
# Load files recursively which obey the pattern
recursive = True
# Set the calbration to convert between intensities and molecular weight in kDa
mw_intensity_line_pars = [2.59682927e-05, 6.35853659e-05]  # [slope, intercept]
# Choose a meaningful key/name for the dataset to be created
key = 'NAME_OF_DATASET'

# Load the iscam data and create a dataset
filelist = get_iscam_h5py_filelist(path, pattern=pattern, recursive=recursive)
dataset = create_iscam_dataset(filelist, mw_intensity_line_pars=mw_intensity_line_pars, name=key)
dataset['info']

# Fit and plot iSCAM data of a dataset

In [None]:
# Set fitting and plotting parameters
# Set initial start values for gauss peak centers
centers_init = [90, 180, 360, 540, 720]
# Set the range (center +/- center_pm) a center can be varied during fitting
center_pm = 50  # kDa
# Set the initial full width at half maximum of the gaussians
fwhm = mw = 50  # kDa
# Fit CDF or histogram
cdf = False
# Set the parameters of the linear function the sigmas of the gaussians
# should be constrained to depending on the centers [slope, intercept]:
# sigma = slope * center + intercept
# A value of None disables the constrain of sigma.
sigma_center_line_pars = None  # [slope, intercept]
# Set range of weight events to be used for fitting (min, max)
mw_range = (0, 1000)  # kDa
# Set number of bins the mw_range will be split into
bins = 100
# Use means of MWs of datapoints in bins
# instead of center of bins for fitting
use_x_means= False
# Use weights (uncertainties) of number of datapoints
# of the bins for fitting
use_y_weights = False
# Plot black lines at fitted centers
centers = True
# Plot CDF of protomers
cdf_protomers = True
# Plot CDF of monomers
cdf_monomers = False
# Plot CDF of fit
cdf_fit = False
# Plot individual gaussian components or the sum of all gaussians
components = True
# Select the range taht should be plotted
plot_range = (0, 1000)  # kDa
# Set the molecular weight the xticks should be separated
mw = 25  # kDa
# Select the xticks that should have a MW label
labeled_xticks = [0, 20, 40]  # number of mws
# Set path to where the figure should be saved. See
# matplotlib documentation for supported file types.
# A value of None disables the saving of the figure.
figpath = '../hist_fit.png'

# Do the fitting and plotting
result, fig_ax = fit_and_plot_iscam(dataset, centers_init, center_pm=center_pm, fwhm=fwhm, cdf=cdf,
                                    sigma_center_line_pars=sigma_center_line_pars, mw_range=mw_range,
                                    bins=bins, use_x_means=use_x_means, use_y_weights=use_y_weights,
                                    centers=centers, cdf_protomers=cdf_protomers,
                                    cdf_monomers=cdf_monomers, cdf_fit=cdf_fit,
                                    components=components, plot_range=plot_range, mw=mw,
                                    labeled_xticks=labeled_xticks, figpath=figpath)

# Convert fit result to more convenient dictionaries/numpy ndarrays
#fit_params = get_fit_params(result.params, verbose=True)
#fit_values = get_fit_values(fit_params)

# Retrieve a fit result from a dataset

In [None]:
# Determine the best fit result (according to the Akaike criterion) from a dataset
result_key, result_params, fit_params = get_best_result(dataset)

# Get settings, fit_report, fit_params and result_params from best fit result
settings = dataset['results'][result_key]['settings']
fit_report = dataset['results'][result_key]['fit_report']
fit_params = dataset['results'][result_key]['fit_params']
result_params = dataset['results'][result_key]['result_params']

# Print fit report, content of fit_params, and show result_params
print('FIT REPORT:\n', fit_report, '\n')
print('FIT PARAMS:\n', fit_params, '\n')
print('RESULT PARAMS:')
result_params

# Save a dataset in a dictionary

In [None]:
# Save the previously created dataset in a fresh dictionary with the key 'NAME_OF_DATASET':
datasets = {}
key = 'NAME_OF_DATASET'
datasets[key] = dataset

# Save dictionary to disk
with open('./datasets.pkl','wb') as f:
    pickle.dump(datasets, f)

# Load a dataset from a dictionary

In [None]:
# Load dictionary from disk
with open('./datasets.pkl','rb') as f:
    datasets = pickle.load(f)

# Get dataset from dictionary with the key 'NAME_OF_DATASET':
key = 'NAME_OF_DATASET'    
dataset = datasets[key]