# Analyzing the output of a polychrom simulation

Running example.py generates a few different output files, one with the starting conformation, one with the intial arguments, one with the appliced forces, one with the forcekit, and finally the blocks. Here, we ran 10 blocks, each of 100 timesteps each, and set the max data length to 5.

In [2]:
ls trajectory/

applied_forces_0.h5  blocks_5-9.h5                 initArgs_0.h5
blocks_0-4.h5        forcekit_polymer_chains_0.h5  starting_conformation_0.h5




In [1]:
import pandas as pd

In [4]:
#Here are the import statements sameer used in his notebook to analyze a simulation
import os
import importlib as imp
from collections import defaultdict
import h5py
import json
from copy import deepcopy

import matplotlib as mpl
import matplotlib.pyplot as plt

import numpy as np
import numpy.ma as ma
import pandas as pd
import scipy
from scipy.ndimage import gaussian_filter1d
from scipy.interpolate import interp1d, interp2d

import polychrom
from polychrom import polymer_analyses, contactmaps, polymerutils
from polychrom.hdf5_format import list_URIs, load_URI

import nglutils as ngu
import nglview as nv

In [5]:
%matplotlib notebook

## Reading data

list_URIs will list all of the files starting with `block` with one URI (universal resource identifier aka path) per conformation

In [6]:
trajs = list_URIs('trajectory')
print(trajs)

['trajectory/blocks_0-4.h5::0', 'trajectory/blocks_0-4.h5::1', 'trajectory/blocks_0-4.h5::2', 'trajectory/blocks_0-4.h5::3', 'trajectory/blocks_0-4.h5::4', 'trajectory/blocks_5-9.h5::5', 'trajectory/blocks_5-9.h5::6', 'trajectory/blocks_5-9.h5::7', 'trajectory/blocks_5-9.h5::8', 'trajectory/blocks_5-9.h5::9']


In [4]:
#TODO: figure out how to extract initial parameters from other h5 files

In [7]:
data = load_URI(trajs[0])

In [8]:
data

{'pos': array([[-0.47, -1.51, -0.37],
        [-0.45, -1.95,  0.55],
        [ 0.45, -2.32,  0.71],
        ...,
        [ 2.05, -1.76,  0.76],
        [ 1.21, -1.56,  0.26],
        [ 0.5 , -1.88, -0.22]], dtype=float32),
 'block': 0,
 'kineticEnergy': 1.8880411331282307,
 'potentialEnergy': 2.5845410077802025,
 'time': 6.356380507349968}

In [9]:
ls trajectory

applied_forces_0.h5  blocks_5-9.h5                 initArgs_0.h5
blocks_0-4.h5        forcekit_polymer_chains_0.h5  starting_conformation_0.h5


In [15]:
with h5py.File("trajectory/initArgs_0.h5", 'r') as f:
    N = f.attrs['N']
N

10000

In [1]:
ls simulations/test_integrator

applied_forces_0.h5  blocks_400-499.h5  blocks_900-999.h5
blocks_0-99.h5       blocks_500-599.h5  forcekit_polymer_chains_0.h5
blocks_100-199.h5    blocks_600-699.h5  initArgs_0.h5
blocks_200-299.h5    blocks_700-799.h5  starting_conformation_0.h5
blocks_300-399.h5    blocks_800-899.h5


In [11]:
trajs = list_URIs('simulations/test_integrator')
print(trajs)

['simulations/test_integrator/blocks_0-99.h5::0', 'simulations/test_integrator/blocks_0-99.h5::1', 'simulations/test_integrator/blocks_0-99.h5::2', 'simulations/test_integrator/blocks_0-99.h5::3', 'simulations/test_integrator/blocks_0-99.h5::4', 'simulations/test_integrator/blocks_0-99.h5::5', 'simulations/test_integrator/blocks_0-99.h5::6', 'simulations/test_integrator/blocks_0-99.h5::7', 'simulations/test_integrator/blocks_0-99.h5::8', 'simulations/test_integrator/blocks_0-99.h5::9', 'simulations/test_integrator/blocks_0-99.h5::10', 'simulations/test_integrator/blocks_0-99.h5::11', 'simulations/test_integrator/blocks_0-99.h5::12', 'simulations/test_integrator/blocks_0-99.h5::13', 'simulations/test_integrator/blocks_0-99.h5::14', 'simulations/test_integrator/blocks_0-99.h5::15', 'simulations/test_integrator/blocks_0-99.h5::16', 'simulations/test_integrator/blocks_0-99.h5::17', 'simulations/test_integrator/blocks_0-99.h5::18', 'simulations/test_integrator/blocks_0-99.h5::19', 'simulati

## Computing P(s) scaling as a function of simulation time

In [16]:
from cooltools.lib import numutils
from pathlib import Path

basepath ='simulations/test_integrator'

In [23]:
with h5py.File(Path(basepath)/"initArgs_0.h5", 'r') as f:
    N = f.attrs['N']
chains = np.arange(0, N+1, N)

T_steps = 1000
integrations_per_save = 100
T_blocks = T_steps

col_rate = 2.0
density = 0.224
Nmd = 10000

polymer_state = f'density{density:.3f}_col-rate{col_rate:.2f}'
basepath = 'simulations/test_integrator'

In [18]:
bin_edges = numutils._logbins_numba(1, N, ratio=1.25, prepend_zero=True)
bin_edges

array([  0,   1,   2,   3,   4,   5,   7,   9,  11,  14,  18,  23,  30,
        38,  48,  62,  78, 100])

In [22]:
start = int(0.05*T_blocks)
end = T_blocks
block_ratio = 1.25
blocks = numutils._logbins_numba(start, end, ratio=block_ratio, prepend_zero=False)
cutoff_rad = 1.0
blocks

array([  50,   64,   82,  106,  136,  174,  224,  287,  368,  473,  607,
        779, 1000])

In [13]:
def Ps_sorter(blocks, bin_edges, chains, cutoff=1.1):
    
    def process(uri):
        idx = int(uri.split('::')[-1])
        data = load_URI(uri)['pos']
        
        ser = {}
        chunk = np.searchsorted(blocks, idx, side='right')
        ser['chunk'] = [chunk]
        
        bins = None
        contacts = None
        for st, end in zip(chains[0:-1],chains[1:]):
            conf = data[st:end,:]
            x,y = polymer_analyses.contact_scaling(conf, bins0=bin_edges, cutoff=cutoff)
            if bins is None:
                bins = x
            if contacts is None:
                contacts = y
            else:
                contact = contacts + y
                
        ser['Ps'] = [(bins, contacts)]
        return pd.DataFrame(ser)
    
    return process