In [2]:
from simulation import *
from aesthetics import *
from models import *
from data import *
from analysis import *

import glob
import os as os
import re as re
import pandas as pd

%reload_ext autoreload
%autoreload 2
%matplotlib inline
mpl.rc('text', usetex=True)

# Scan through PKA using the techniques developed on ADK for interesting torsions.

In [19]:
pka_dir = '../../md-data/pka-md-data/'
pka_unbound_files = sorted(glob.glob(pka_dir + 'apo/' + '*'))
names = []
for file in range(len(pka_unbound_files)):
    name = os.path.splitext(os.path.basename(pka_unbound_files[file]))[0]
    name = re.search('^[^_]*', name).group(0)        
    if re.search('omega*', name):
        continue
    if re.search('chi3ASN*', name):
        continue
    if re.search('chi5LYS*', name):
        continue
    # This one is missing in the PKA data set. Unclear why.
    if re.search('chi1CYM185', name):
        continue
    names.append(name)

In [20]:
calculation = True
df = pd.DataFrame()
if calculation:
    for concentration in np.arange(-6, 0, 0.1):
        for name in names:
            directional_flux, intersurface_flux, driven_flux = summarize_fluxes(name, concentration=10**concentration,
                                                                               data_source='pka_md_data') 
            df = df.append(pd.DataFrame({'Concentration': concentration,
                                        'Directional flux': directional_flux,
                                        'Driven flux': driven_flux,
                                        'File': name,
                                        'ResID': re.match('.*?([0-9]+)$', name).group(1),
                                        }, index=[0]), ignore_index=True)
    df.to_pickle('pka-concentration-scan.pickle')
else:
    df = pd.read_pickle('pka-concentration-scan.pickle')

In [None]:
calculation = True
df = pd.DataFrame()
if calculation:
    for concentration in np.arange(-6, 0, 0.1):
        for name in names:
            directional_flux, intersurface_flux, driven_flux = summarize_fluxes(name, concentration=10**concentration,
                                                                               data_source='pka_md_data') 
            if directional_flux < 0:
                negative=True
            else:
                negative=False
            max_power, max_load = summarize_power_and_load(name, concentration=10**concentration, negative=negative, 
                                                           debug=False, data_source='pka_md_data')
            df = df.append(pd.DataFrame({'Concentration': concentration,
                                    'Directional flux': directional_flux,
                                    'Intersurface flux': intersurface_flux,
                                    'Driven flux': driven_flux,
                                    'File': name,
                                    'Max power': max_power,
                                    'Max load' : max_load,
                                    'ResID': re.match('.*?([0-9]+)$', name).group(1),
                                    }, index=[0]), ignore_index=True)

    df.to_pickle('pka-concentration-scan.pickle')
else:
    df = pd.read_pickle('pka-concentration-scan.pickle')

Let's write out the concentration counts file.

In [63]:
df.head()

Unnamed: 0,Concentration,Directional flux,Driven flux,File,Intersurface flux,Max load,Max power,ResID
0,-6.0,-0.000245,0.008517,chi1ARG119,0.00259,0.0,0.0,119
1,-6.0,0.000277,0.041746,chi1ARG120,0.01209,0.0,0.0,120
2,-6.0,0.000675,0.019187,chi1ARG123,0.005491,0.0,0.0,123
3,-6.0,-0.000972,0.00585,chi1ARG130,0.001295,0.0,0.0,130
4,-6.0,0.057544,0.074444,chi1ARG151,0.020438,1e-05,1.541895e-07,151


In [64]:
calculation = False
concentration_counts = pd.DataFrame()
if calculation:

    for concentration in np.arange(-6, 0, 0.1):
        # This is the dataframe that matches this concentration...
        tmp = df[np.round(df['Concentration'], 1) ==  np.round(concentration, 1)]
        tmp['Directional flux'] = tmp['Directional flux'].abs()
        tmp['Driven flux'] = tmp['Driven flux'].abs()

        concentration_counts = concentration_counts.append(pd.DataFrame({
                                'Concentration' : concentration,
                                'Directional flux > 1' : np.sum(tmp['Directional flux'] > 1),
                                'Driven flux > 1' : np.sum(tmp['Driven flux'] > 1),
                    }, index=[0]), ignore_index=True)
    concentration_counts.to_pickle('pka-concentration-counts.pickle')
else:
    concentration_counts = pd.read_pickle('pka-concentration-counts.pickle')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [66]:
concentration_counts

Unnamed: 0,Concentration,Directional flux > 1,Driven flux > 1
0,-6.0,0,0
1,-5.9,0,0
2,-5.8,0,1
3,-5.7,0,1
4,-5.6,0,0
5,-5.5,0,1
6,-5.4,0,0
7,-5.3,0,3
8,-5.2,0,15
9,-5.1,0,42


In [67]:
percent = 100 * concentration_counts[np.round(concentration_counts['Concentration'], 1) ==  np.round(-3.0, 1)]['Directional flux > 1'].values[0] / len(names)

In [68]:
percent

8.7313432835820901

In [50]:
df = pd.read_pickle('adk-concentration-counts.pickle')

In [55]:
calculation = True
adk_concentration_counts = pd.DataFrame()
if calculation:

    for concentration in np.arange(-6, 0, 0.1):
        # This is the dataframe that matches this concentration...
        tmp = df[np.round(df['Concentration'], 1) ==  np.round(concentration, 1)]
        tmp['Directional flux'] = tmp['Directional flux'].abs()
        tmp['Driven flux'] = tmp['Driven flux'].abs()

        adk_concentration_counts = adk_concentration_counts.append(pd.DataFrame({
                                'Concentration' : concentration,
                                'Directional flux > 1' : np.sum(tmp['Directional flux'] > 1),
                                'Driven flux > 1' : np.sum(tmp['Driven flux'] > 1),
                    }, index=[0]), ignore_index=True)
    adk_concentration_counts.to_pickle('adk-concentration-counts.pickle')
else:
    adk_concentration_counts = pd.read_pickle('adk-concentration-counts.pickle')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [56]:
adk_percent = 100 * adk_concentration_counts[np.round(adk_concentration_counts['Concentration'], 1) ==  np.round(-3.0, 1)]['Directional flux > 1'].values[0] / 903

In [57]:
adk_percent

13.732004429678849

# HIV

In [3]:
hiv_dir = '../../md-data/hiv-protease/'
hiv_unbound_files = sorted(glob.glob(hiv_dir + '1hhp_apo/' + '*'))
names = []
for file in range(len(hiv_unbound_files)):
    name = os.path.splitext(os.path.basename(hiv_unbound_files[file]))[0]
    name = re.search('^[^_]*', name).group(0)        
    if re.search('omega*', name):
        continue
    if re.search('chi3ASN*', name):
        continue
    if re.search('chi5LYS*', name):
        continue
        continue
    names.append(name)

In [71]:
calculation = True
df = pd.DataFrame()
if calculation:
    for concentration in np.arange(-6, 0, 0.1):
        print(concentration)
        for name in names:
            directional_flux, intersurface_flux, driven_flux = summarize_fluxes(name, concentration=10**concentration,
                                                                               data_source='hiv_md_data') 
            if directional_flux < 0:
                negative=True
            else:
                negative=False
            max_power, max_load = summarize_power_and_load(name, concentration=10**concentration, negative=negative, 
                                                           debug=False, data_source='hiv_md_data')
            df = df.append(pd.DataFrame({'Concentration': concentration,
                                    'Directional flux': directional_flux,
                                    'Intersurface flux': intersurface_flux,
                                    'Driven flux': driven_flux,
                                    'File': name,
                                    'Max power': max_power,
                                    'Max load' : max_load,
                                    'ResID': re.match('.*?([0-9]+)$', name).group(1),
                                    }, index=[0]), ignore_index=True)

    df.to_pickle('hiv-concentration-scan.pickle')
else:
    df = pd.read_pickle('hiv-concentration-scan.pickle')

-6.0


  ss = abs(eigenvectors[:, self.eigenvalues.argmax()].astype(float))


-5.9
-5.8
-5.7
-5.6
-5.5
-5.4
-5.3
-5.2
-5.1
-5.0
-4.9
-4.8
-4.7
-4.6
-4.5
-4.4
-4.3
-4.2
-4.1
-4.0
-3.9
-3.8
-3.7
-3.6
-3.5
-3.4
-3.3
-3.2
-3.1
-3.0
-2.9
-2.8
-2.7
-2.6
-2.5
-2.4
-2.3
-2.2
-2.1
-2.0
-1.9
-1.8
-1.7
-1.6
-1.5
-1.4
-1.3
-1.2
-1.1
-1.0
-0.9
-0.8
-0.7
-0.6
-0.5
-0.4
-0.3
-0.2
-0.1


In [None]:
calculation = True
df = pd.DataFrame()
if calculation:
    for concentration in np.arange(-6, 0, 0.1):
        print(concentration)
        for name in names:
            directional_flux, intersurface_flux, driven_flux = summarize_fluxes(name, concentration=10**concentration,
                                                                               data_source='hiv_md_data',
                                                                               catalytic_rate=200) 
            if directional_flux < 0:
                negative=True
            else:
                negative=False
            max_power, max_load = summarize_power_and_load(name, concentration=10**concentration, negative=negative, 
                                                           debug=False, data_source='hiv_md_data',
                                                           catalytic_rate=200)
            df = df.append(pd.DataFrame({'Concentration': concentration,
                                    'Directional flux': directional_flux,
                                    'Intersurface flux': intersurface_flux,
                                    'Driven flux': driven_flux,
                                    'File': name,
                                    'Max power': max_power,
                                    'Max load' : max_load,
                                    'ResID': re.match('.*?([0-9]+)$', name).group(1),
                                    }, index=[0]), ignore_index=True)

    df.to_pickle('hiv-concentration-scan-catalytic-rate-200.pickle')
else:
    df = pd.read_pickle('hiv-concentration-scan-catalytic-rate-200.pickle')

-6.0


In [72]:
df

Unnamed: 0,Concentration,Directional flux,Driven flux,File,Intersurface flux,Max load,Max power,ResID
0,-6.0,0.000029,0.000529,chi1ALA121,0.000006,0.00000,0.000000e+00,121
1,-6.0,-0.000008,0.000304,chi1ALA127,0.000005,0.00000,0.000000e+00,127
2,-6.0,-0.000026,0.000384,chi1ALA170,0.000026,0.00000,0.000000e+00,170
3,-6.0,0.000002,0.000439,chi1ALA22,0.000007,0.00000,0.000000e+00,22
4,-6.0,0.000029,0.001284,chi1ALA28,0.000003,0.00000,0.000000e+00,28
5,-6.0,0.000007,0.000404,chi1ALA71,0.000002,0.00000,0.000000e+00,71
6,-6.0,0.000246,0.001099,chi1ARG107,0.000027,0.00000,0.000000e+00,107
7,-6.0,-0.000123,0.000658,chi1ARG113,0.000026,0.00000,0.000000e+00,113
8,-6.0,-0.000293,0.001087,chi1ARG14,0.000006,0.00000,0.000000e+00,14
9,-6.0,0.000018,0.000246,chi1ARG156,0.000011,0.00000,0.000000e+00,156


In [73]:
def return_concentration_slice(df, concentration):
    tmp = df[np.round(df['Concentration'], 1) ==  np.round(concentration, 1)]
    return tmp

In [74]:
tmp = return_concentration_slice(df, -3.0)

In [78]:
sum(tmp['Directional flux'] > 1) +  sum(tmp['Directional flux'] < -1)

0

In [79]:
sum(tmp['Directional flux'] > 0.1) +  sum(tmp['Directional flux'] < -0.1)

2

In [85]:
tmp.sort('Max power', ascending=False).head()

  if __name__ == '__main__':


Unnamed: 0,Concentration,Directional flux,Driven flux,File,Intersurface flux,Max load,Max power,ResID
24838,-3.0,-0.049167,0.195966,chi4ARG87,0.035266,-0.00051,2.3e-05,87
25206,-3.0,-0.33625,1.045363,psiLYS55,0.126553,-3e-05,1.1e-05,55
24835,-3.0,-0.015845,0.143058,chi4ARG186,0.038707,-0.00045,6e-06,186
24478,-3.0,0.054128,0.164541,chi1ASP30,0.030889,0.0001,6e-06,30
24952,-3.0,-0.0044,0.089572,phiHID69,0.036455,-0.00013,5e-06,69
