In [108]:
from simulation import *
from aesthetics import *
from models import *
from data import *
import os as os
import re as re
import pandas as pd

%reload_ext autoreload
%autoreload 2
%matplotlib inline
mpl.rc('text', usetex=True)

# A model for flux prediction
- Reflect at 2*pi and compute JSD with reflection, then add the JSD value for each surface.
- Move by 1 bin and repeat.
- Note the lowest sum (for any bin shift) of JSDs. Call this J_symmetric.

- Compute the PDF on each surface (done)
- Compute the JDF between the PMFs. Call this J_bind.

- p_min: lowest probability means there is a high barrier.

Then: 
f = J_symmetric * J_bind * p_min.

In [109]:
def JSD(x, y): #Jensen-shannon divergence
    x = np.array(x)
    y = np.array(y)
    d1 = x * np.log2(2 * x / (x + y))
    d2 = y * np.log2(2 * y / (x + y))
    d1[np.isnan(d1)] = 0
    d2[np.isnan(d2)] = 0
    d = 0.5 * np.sum(d1 + d2)
    return d


In [110]:
def permute(x, shift):
    return np.roll(x, shift)

In [111]:
def reflect(x, point):
    return np.concatenate((x[point:], x[:point]))[::-1]

In [112]:
df = pd.DataFrame()
for file in range(len(unbound_files)):
    long_name = os.path.splitext(os.path.basename(unbound_files[file]))[0]
    short_name = re.search('^[^_]*', long_name).group(0)
    if short_name == 'chi1CYM185':
        pass
    else:
        data = np.genfromtxt(unbound_files[file])[1:]
        J_symmetric = 1.0
        for permutation in range(len(data)):
            J = JSD(data, reflect(data, permutation))
            if J < J_symmetric:
                J_symmetric = J
        df = df.append(pd.DataFrame({'Name': short_name,
                                     'J_symmetric': J_symmetric}, index=[0]), ignore_index=True)

In [113]:
df.head()

Unnamed: 0,J_symmetric,Name
0,0.133388,chi1ARG119
1,0.189839,chi1ARG120
2,0.042087,chi1ARG123
3,0.043733,chi1ARG130
4,0.021699,chi1ARG151


In [114]:
for file in range(len(unbound_files)):
    long_name = os.path.splitext(os.path.basename(unbound_files[file]))[0]
    short_name = re.search('^[^_]*', long_name).group(0)
    if short_name == 'chi1CYM185':
        pass
    else:
        unbound_data = np.genfromtxt(unbound_files[file])[1:]
        bound_data = np.genfromtxt(bound_files[file])[1:]
        J = JSD(unbound_data, bound_data)
        df.loc[file, 'J_bind'] = J

In [115]:
df.head()

Unnamed: 0,J_symmetric,Name,J_bind
0,0.133388,chi1ARG119,0.011806
1,0.189839,chi1ARG120,0.046672
2,0.042087,chi1ARG123,0.027365
3,0.043733,chi1ARG130,0.005905
4,0.021699,chi1ARG151,0.167753


In [116]:
for file in range(len(unbound_files)):
    long_name = os.path.splitext(os.path.basename(unbound_files[file]))[0]
    short_name = re.search('^[^_]*', long_name).group(0)
    if short_name == 'chi1CYM185':
        pass
    else:
        unbound_data = np.genfromtxt(unbound_files[file])[1:]
        bound_data = np.genfromtxt(bound_files[file])[1:]
        p_min = min(np.concatenate((unbound_data, bound_data)))
        df.loc[file, 'p_min'] = p_min

In [117]:
df.head()

Unnamed: 0,J_symmetric,Name,J_bind,p_min
0,0.133388,chi1ARG119,0.011806,0.0
1,0.189839,chi1ARG120,0.046672,0.0
2,0.042087,chi1ARG123,0.027365,0.0
3,0.043733,chi1ARG130,0.005905,0.0
4,0.021699,chi1ARG151,0.167753,0.0


In [118]:
df['Mike'] = df['J_bind'] * df['J_symmetric'] * df['p_min']

In [119]:
df.sort_values('Mike', ascending=False)

Unnamed: 0,J_symmetric,Name,J_bind,p_min,Mike
391,0.144513,chi2ILE121,0.030752,0.002373,1.054522e-05
1128,0.114767,psiGLY36,0.066656,0.000760,5.815523e-06
325,0.017022,chi2ASN99,0.077030,0.000398,5.221205e-07
319,0.145435,chi2ASN279,0.045090,0.000063,4.131116e-07
618,0.078463,chi3MET44,0.023699,0.000105,1.952350e-07
615,0.065905,chi3MET106,0.076319,0.000036,1.820795e-07
385,0.035407,chi2HIS144,0.020248,0.000145,1.038094e-07
320,0.013098,chi2ASN312,0.051165,0.000109,7.277921e-08
323,0.061373,chi2ASN76,0.003513,0.000315,6.791110e-08
1123,0.094341,psiGLY268,0.017366,0.000036,5.930758e-08
