### Notebook script to classify md frames between active and inactive

Using regression instead of classification now 



In [1]:
import pandas as pd
import numpy as np
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio
PROTEINS = ["TEM1_1M40", "ENCA_3ZDJ", "GNCA_4B88", "PNCA_4C6Y"]

# Distance cutoffs for active state  
ACTIVE_MAX_WATER_GLU = 3
ACTIVE_MAX_WATER_SUB = 3.5
ACTIVE_MAX_SER130_SUB = 3.5
ACTIVE_MAX_LYS73_SUB = 3.5
ACTIVE_MAX_ASN132_LYS73 = 3.5
ACTIVE_MAX_A237_SUB = 3.5
ACTIVE_MAX_SER70_SUB = 3.5 

# Distance cutoffs for inactive state
INACTIVE_MIN_WATER_GLU = 4
INACTIVE_MIN_WATER_SUB = 4.5
INACTIVE_MIN_SER130_SUB = 4.5
INACTIVE_MIN_LYS73_SUB = 4.5
INACTIVE_MIN_ASN132_LYS73 = 4.5
INACTIVE_MIN_A237_SUB = 4.5
INACTIVE_MIN_SER70_SUB = 4.5


In [2]:
benzyl_dfs, cefo_dfs = {}, {}
for protein in PROTEINS:

    # benzyl
    results_file = rf"target_variable/{protein}_Benzyl_Pen_dists.dat"
    df = pd.read_csv(results_file, sep=r"\s+")
    df["min_wat_glu"] = df[["wat_glu_1", "wat_glu_2","wat_glu_3", "wat_glu_4"]].min(axis=1)
    benzyl_dfs[protein] = df

    # cefo
    results_file = rf"target_variable/{protein}_Cefo_dists.dat"
    df = pd.read_csv(results_file, sep=r"\s+")
    df["min_wat_glu"] = df[["wat_glu_1", "wat_glu_2","wat_glu_3", "wat_glu_4"]].min(axis=1)
    cefo_dfs[protein] = df


In [3]:
cefo_dfs["ENCA_3ZDJ"]

Unnamed: 0,#Frame,wat_glu_1,wat_glu_2,wat_glu_3,wat_glu_4,wat_sub,ser130_sub,lys73_sub,asn132_lys73,a237_sub,ser70_sub,min_wat_glu
0,1,2.3260,3.1742,1.9477,3.2917,2.9050,4.3015,2.7166,2.7547,2.9073,2.7744,1.9477
1,2,2.3488,3.0507,1.8562,3.3071,3.4551,4.6864,3.8380,3.1789,2.8092,2.8148,1.8562
2,3,2.4506,3.0016,1.6966,3.1437,3.1154,4.0017,3.5665,2.8184,2.9172,2.8453,1.6966
3,4,2.7365,2.9594,1.7820,3.0310,3.7577,4.0135,3.5605,2.8441,2.9365,2.9132,1.7820
4,5,2.4636,2.8646,1.4917,2.8856,3.0168,4.6535,2.6222,2.8064,2.9142,2.7869,1.4917
...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9996,3.6073,2.1082,3.2846,2.1614,3.4202,7.0998,3.4064,2.9844,2.9287,2.8977,2.1082
9996,9997,3.8190,2.3629,3.1349,1.8871,3.5768,6.6278,3.3070,2.7015,2.8924,2.8196,1.8871
9997,9998,3.2081,1.7775,3.3864,2.5468,3.2968,6.4407,3.1357,2.7100,2.8309,2.6965,1.7775
9998,9999,3.3192,1.8391,3.2114,2.3631,3.0356,6.3139,3.1580,2.8139,2.8044,2.8435,1.8391


In [4]:
for protein, df in benzyl_dfs.items():
    classifications, regressions = [], []
    for idx, row in df.iterrows():
        
        # distances:
        wat_sub_dist = row["wat_sub"]
        wat_glu_dist = row["min_wat_glu"]
        #ser130_sub_dist = row["ser130_sub"]
        lys73_sub_dist = row["lys73_sub"]
        asn132_lys73_dist = row["asn132_lys73"]
        a237_sub_dist = row["a237_sub"]
        ser70_sub_dist = row["ser70_sub"]
        
        # add regression values here...
        total_dist = sum([
            wat_sub_dist, wat_glu_dist, lys73_sub_dist, asn132_lys73_dist, a237_sub_dist, ser70_sub_dist
        ])
        regressions.append(total_dist)

        # check if active 
        frame_active = all([
            wat_sub_dist <= ACTIVE_MAX_WATER_SUB,
            wat_glu_dist <= ACTIVE_MAX_WATER_GLU,
            #ser130_sub_dist <= ACTIVE_MAX_SER130_SUB,
            lys73_sub_dist <= ACTIVE_MAX_LYS73_SUB,
            asn132_lys73_dist <= ACTIVE_MAX_ASN132_LYS73,
            a237_sub_dist <= ACTIVE_MAX_A237_SUB,
            ser70_sub_dist <= ACTIVE_MAX_SER70_SUB,
        ])

        if frame_active:
            classifications.append("Active")

        else:
            classifications.append("Neither")

    df["regressions"] = regressions
    df["classifications"] = classifications


for protein, df in cefo_dfs.items():
    classifications, regressions = [], []
    for idx, row in df.iterrows():
        
        # distances:
        wat_sub_dist = row["wat_sub"]
        wat_glu_dist = row["min_wat_glu"]
        #ser130_sub_dist = row["ser130_sub"]
        lys73_sub_dist = row["lys73_sub"]
        asn132_lys73_dist = row["asn132_lys73"]
        a237_sub_dist = row["a237_sub"]
        ser70_sub_dist = row["ser70_sub"]

        # add regression values here...
        total_dist = sum([
            wat_sub_dist, wat_glu_dist, lys73_sub_dist, asn132_lys73_dist, a237_sub_dist, ser70_sub_dist
        ])
        regressions.append(np.round(total_dist, 2))

        # check if active 
        frame_active = all([
            wat_sub_dist <= ACTIVE_MAX_WATER_SUB,
            wat_glu_dist <= ACTIVE_MAX_WATER_GLU,
            #ser130_sub_dist <= ACTIVE_MAX_SER130_SUB,
            lys73_sub_dist <= ACTIVE_MAX_LYS73_SUB,
            asn132_lys73_dist <= ACTIVE_MAX_ASN132_LYS73,
            a237_sub_dist <= ACTIVE_MAX_A237_SUB,
            ser70_sub_dist <= ACTIVE_MAX_SER70_SUB,
        ])

        if frame_active:
            classifications.append("Active")

        else:
            classifications.append("Inactive")

    df["regressions"] = regressions
    df["classifications"] = classifications

In [5]:
benzyl_dfs["ENCA_3ZDJ"]

Unnamed: 0,#Frame,wat_glu_1,wat_glu_2,wat_glu_3,wat_glu_4,wat_sub,ser130_sub,lys73_sub,asn132_lys73,a237_sub,ser70_sub,min_wat_glu,regressions,classifications
0,1,2.3186,3.2471,1.8352,3.3169,2.9170,4.7573,2.9094,2.7981,2.8331,2.8877,1.8352,16.1805,Active
1,2,2.7138,3.0906,1.7177,3.0001,2.8765,4.4280,2.9084,2.6725,2.9580,2.8165,1.7177,15.9496,Active
2,3,2.0404,2.9969,1.9023,3.4138,2.9888,4.6321,3.1960,2.8994,2.7629,2.8851,1.9023,16.6345,Active
3,4,2.5737,3.1767,1.9001,3.3074,3.1046,4.3247,2.9852,2.7444,3.0001,2.7371,1.9001,16.4715,Active
4,5,2.2580,2.9922,1.8651,3.3419,2.8085,4.3374,2.9199,2.6785,3.1597,3.1181,1.8651,16.5498,Active
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9996,2.1064,2.8730,2.3806,3.8513,3.4890,6.2498,5.0693,4.6859,3.0401,2.7401,2.1064,21.1308,Neither
9996,9997,2.5569,3.5904,1.9751,3.4860,3.5757,6.0195,4.9267,4.2840,2.8583,2.8625,1.9751,20.4823,Neither
9997,9998,2.0068,2.9231,2.2751,3.7660,3.5445,6.0537,5.2582,5.0962,2.7469,2.9033,2.0068,21.5559,Neither
9998,9999,1.7410,3.1154,3.0745,4.4796,3.7428,5.8890,5.1752,5.9080,2.9892,2.8609,1.7410,22.4171,Neither


In [6]:
for protein, df in benzyl_dfs.items():
    print(f"{protein=}")
    print(df["classifications"].value_counts())
    print("\n")

protein='TEM1_1M40'
classifications
Neither    5164
Active     4836
Name: count, dtype: int64


protein='ENCA_3ZDJ'
classifications
Neither    7489
Active     2511
Name: count, dtype: int64


protein='GNCA_4B88'
classifications
Neither    7223
Active     2777
Name: count, dtype: int64


protein='PNCA_4C6Y'
classifications
Neither    6647
Active     3353
Name: count, dtype: int64




In [7]:
for protein, df in cefo_dfs.items():
    print(f"{protein=}")
    print(df["classifications"].value_counts())
    print("\n")

protein='TEM1_1M40'
classifications
Active      5831
Inactive    4169
Name: count, dtype: int64


protein='ENCA_3ZDJ'
classifications
Inactive    6497
Active      3503
Name: count, dtype: int64


protein='GNCA_4B88'
classifications
Inactive    6351
Active      3649
Name: count, dtype: int64


protein='PNCA_4C6Y'
classifications
Inactive    6582
Active      3418
Name: count, dtype: int64




In [8]:
# write classifications out
for protein, df in benzyl_dfs.items():
    classifications = df["classifications"]
    out_file_path = rf"target_variable/{protein}_Benzyl_Pen_Classified.txt"
    with open(out_file_path, "w") as file:
        for classification in classifications:
            file.write(f"{classification}\n")

for protein, df in cefo_dfs.items():
    classifications = df["classifications"]
    out_file_path = rf"target_variable/{protein}_Cefo_Classified.txt"
    with open(out_file_path, "w") as file:
        for classification in classifications:
            file.write(f"{classification}\n")

In [9]:
# write regressions out. 
for protein, df in benzyl_dfs.items():
    regressions = df["regressions"]
    out_file_path = rf"target_variable/{protein}_Benzyl_Pen_Regress.txt"
    with open(out_file_path, "w") as file:
        for regression in regressions:
            file.write(f"{regression}\n")

for protein, df in cefo_dfs.items():
    regressions = df["regressions"]
    out_file_path = rf"target_variable/{protein}_Cefo_Regress.txt"
    with open(out_file_path, "w") as file:
        for regression in regressions:
            file.write(f"{regression}\n")

In [10]:
# for protein, df in protein_dfs.items():

#     # classifications file
#     classifications = df["classifications"]
#     out_file_path = rf"target_variable/{protein}_Benzyl_Pen_classified.txt"
#     with open(out_file_path, "w") as file:
#         for classification in classifications:
#             file.write(f"{classification}\n")

#     # regression based on water distance
#     regressions = df["wat_sub"]
#     out_file_path = rf"target_variable/{protein}_Benzyl_Pen_regress.txt"
#     with open(out_file_path, "w") as file:
#         for value in regressions:
#             file.write(f"{value}\n")

In [11]:
# for protein in PROTEINS:
#     print(protein)
#     print(protein_dfs[protein]["classifications"].value_counts())
#     print("\n")

### Benzyl Pen Figures

In [12]:
# fig = make_subplots(rows=2, cols=2,
#                     subplot_titles=PROTEINS)

# current_idx = [1, 1]
# for protein, df in benzyl_dfs.items():
#     fig.add_trace(go.Scatter(
#         x = df["min_wat_glu"], 
#         y = df["wat_sub"],
#         mode="markers",
#         marker=dict(
#             opacity=0.1,
#             ),
#     ),
#     row=current_idx[0], col=current_idx[1])
    
#     if current_idx == [1, 1]:
#         current_idx = [1, 2]
#     elif current_idx == [1, 2]:
#         current_idx = [2, 1]
#     elif current_idx == [2, 1]:
#         current_idx = [2, 2]


# # Update x and y-axis ranges for each subplot
# for i in range(1, 3):
#     for j in range(1, 3):
#         fig.update_xaxes(title_text="Deprotonation Distance (Å)", row=i, col=j)
#         fig.update_yaxes(title_text="Water Attack Distance (Å)", row=i, col=j)
#         fig.update_yaxes(range = [2,8])
#         fig.update_xaxes(range = [1,6])

# fig.update_layout(showlegend=False,
#     height=700, width=1000,
#     )
# fig.show()
# pio.write_image(fig, r"pics/benzyl_fig1.png", scale=6)

In [13]:
# fig = make_subplots(rows=2, cols=2,
#                     subplot_titles=PROTEINS)

# current_idx = [1, 1]
# for protein, df in benzyl_dfs.items():
#     fig.add_trace(go.Scatter(
#         x = df["lys73_sub"], 
#         y = df["asn132_lys73"],
#         mode="markers",
#         marker=dict(
#             opacity=0.1,
#             ),
#     ),
#     row=current_idx[0], col=current_idx[1])
    
#     if current_idx == [1, 1]:
#         current_idx = [1, 2]
#     elif current_idx == [1, 2]:
#         current_idx = [2, 1]
#     elif current_idx == [2, 1]:
#         current_idx = [2, 2]


# # Update x and y-axis ranges for each subplot
# for i in range(1, 3):
#     for j in range(1, 3):
#         fig.update_xaxes(title_text="Lys73 to Tetrahedral Intermediate (Å)", row=i, col=j)
#         fig.update_yaxes(title_text="Asn132 to Lys73 (Å)", row=i, col=j)
#         fig.update_yaxes(range = [2,8])
#         fig.update_xaxes(range = [2.5,6.5])

# fig.update_layout(showlegend=False,
#     height=700, width=1000,
#     )
# fig.show()
# pio.write_image(fig, r"pics/benzyl_fig2.png", scale=6)

In [14]:
# fig = make_subplots(rows=2, cols=2,
#                     subplot_titles=PROTEINS)

# current_idx = [1, 1]
# for protein, df in benzyl_dfs.items():
#     fig.add_trace(go.Scatter(
#         x = df["min_wat_glu"], 
#         y = df["ser130_sub"],
#         mode="markers",
#         marker=dict(
#             opacity=0.1,
#             ),
#     ),
#     row=current_idx[0], col=current_idx[1])
    
#     if current_idx == [1, 1]:
#         current_idx = [1, 2]
#     elif current_idx == [1, 2]:
#         current_idx = [2, 1]
#     elif current_idx == [2, 1]:
#         current_idx = [2, 2]


# # Update x and y-axis ranges for each subplot
# for i in range(1, 3):
#     for j in range(1, 3):
#         fig.update_xaxes(title_text="Deprotonation Distance (Å)", row=i, col=j)
#         fig.update_yaxes(title_text="Ser130 to Tetrahedral Intermediate (Å)", row=i, col=j)
#         fig.update_yaxes(range = [2,8])
#         fig.update_xaxes(range = [1,5.5])

# fig.update_layout(showlegend=False,
#     height=700, width=1000,
#     )
# fig.show()
# pio.write_image(fig, r"pics/benzyl_fig3.png", scale=6)

In [15]:
# # new fig:
# fig = make_subplots(rows=2, cols=2,
#                     subplot_titles=PROTEINS)

# current_idx = [1, 1]
# for protein, df in benzyl_dfs.items():
#     fig.add_trace(go.Scatter(
#         x = df["a237_sub"], 
#         y = df["ser70_sub"],
#         mode="markers",
#         marker=dict(
#             opacity=0.1,
#             ),
#     ),
#     row=current_idx[0], col=current_idx[1])
    
#     if current_idx == [1, 1]:
#         current_idx = [1, 2]
#     elif current_idx == [1, 2]:
#         current_idx = [2, 1]
#     elif current_idx == [2, 1]:
#         current_idx = [2, 2]


# # Update x and y-axis ranges for each subplot
# for i in range(1, 3):
#     for j in range(1, 3):
#         fig.update_xaxes(title_text="A237 to Carbonyl Oxygen (Å)", row=i, col=j)
#         fig.update_yaxes(title_text="S70 to Carbonyl Oxygen (Å)", row=i, col=j)
#         fig.update_yaxes(range = [2,5])
#         fig.update_xaxes(range = [2,5])

# fig.update_layout(showlegend=False,
#     height=700, width=1000,
#     )
# fig.show()
# pio.write_image(fig, r"pics/benzyl_fig4.png", scale=6)

### Cefo Figures

In [16]:
# fig = make_subplots(rows=2, cols=2,
#                     subplot_titles=PROTEINS)

# current_idx = [1, 1]
# for protein, df in cefo_dfs.items():
#     fig.add_trace(go.Scatter(
#         x = df["min_wat_glu"], 
#         y = df["wat_sub"],
#         mode="markers",
#         marker=dict(
#             opacity=0.1,
#             ),
#     ),
#     row=current_idx[0], col=current_idx[1])
    
#     if current_idx == [1, 1]:
#         current_idx = [1, 2]
#     elif current_idx == [1, 2]:
#         current_idx = [2, 1]
#     elif current_idx == [2, 1]:
#         current_idx = [2, 2]


# # Update x and y-axis ranges for each subplot
# for i in range(1, 3):
#     for j in range(1, 3):
#         fig.update_xaxes(title_text="Deprotonation Distance (Å)", row=i, col=j)
#         fig.update_yaxes(title_text="Water Attack Distance (Å)", row=i, col=j)
#         fig.update_yaxes(range = [2,8])
#         fig.update_xaxes(range = [1,6])

# fig.update_layout(showlegend=False,
#     height=700, width=1000,
#     )
# fig.show()
# pio.write_image(fig, r"pics/cefo_fig1.png", scale=6)

In [17]:
# fig = make_subplots(rows=2, cols=2,
#                     subplot_titles=PROTEINS)

# current_idx = [1, 1]
# for protein, df in cefo_dfs.items():
#     fig.add_trace(go.Scatter(
#         x = df["lys73_sub"], 
#         y = df["asn132_lys73"],
#         mode="markers",
#         marker=dict(
#             opacity=0.1,
#             ),
#     ),
#     row=current_idx[0], col=current_idx[1])
    
#     if current_idx == [1, 1]:
#         current_idx = [1, 2]
#     elif current_idx == [1, 2]:
#         current_idx = [2, 1]
#     elif current_idx == [2, 1]:
#         current_idx = [2, 2]


# # Update x and y-axis ranges for each subplot
# for i in range(1, 3):
#     for j in range(1, 3):
#         fig.update_xaxes(title_text="Lys73 to Tetrahedral Intermediate (Å)", row=i, col=j)
#         fig.update_yaxes(title_text="Asn132 to Lys73 (Å)", row=i, col=j)
#         fig.update_yaxes(range = [2,8])
#         fig.update_xaxes(range = [2.5,6.5])

# fig.update_layout(showlegend=False,
#     height=700, width=1000,
#     )
# fig.show()
# pio.write_image(fig, r"pics/cefo_fig2.png", scale=6)

In [18]:
# fig = make_subplots(rows=2, cols=2,
#                     subplot_titles=PROTEINS)

# current_idx = [1, 1]
# for protein, df in cefo_dfs.items():
#     fig.add_trace(go.Scatter(
#         x = df["min_wat_glu"], 
#         y = df["ser130_sub"],
#         mode="markers",
#         marker=dict(
#             opacity=0.1,
#             ),
#     ),
#     row=current_idx[0], col=current_idx[1])
    
#     if current_idx == [1, 1]:
#         current_idx = [1, 2]
#     elif current_idx == [1, 2]:
#         current_idx = [2, 1]
#     elif current_idx == [2, 1]:
#         current_idx = [2, 2]


# # Update x and y-axis ranges for each subplot
# for i in range(1, 3):
#     for j in range(1, 3):
#         fig.update_xaxes(title_text="Deprotonation Distance (Å)", row=i, col=j)
#         fig.update_yaxes(title_text="Ser130 to Tetrahedral Intermediate (Å)", row=i, col=j)
#         fig.update_xaxes(range = [1,5.5])
#         fig.update_yaxes(range = [2,8])

# fig.update_layout(showlegend=False,
#     height=700, width=1000,
#     )
# fig.show()
# pio.write_image(fig, r"pics/cefo_fig3.png", scale=6)

In [19]:
# # new fig
# fig = make_subplots(rows=2, cols=2,
#                     subplot_titles=PROTEINS)

# current_idx = [1, 1]
# for protein, df in cefo_dfs.items():
#     fig.add_trace(go.Scatter(
#         x = df["a237_sub"], 
#         y = df["ser70_sub"],
#         mode="markers",
#         marker=dict(
#             opacity=0.1,
#             ),
#     ),
#     row=current_idx[0], col=current_idx[1])
    
#     if current_idx == [1, 1]:
#         current_idx = [1, 2]
#     elif current_idx == [1, 2]:
#         current_idx = [2, 1]
#     elif current_idx == [2, 1]:
#         current_idx = [2, 2]


# # Update x and y-axis ranges for each subplot
# for i in range(1, 3):
#     for j in range(1, 3):
#         fig.update_xaxes(title_text="A237 to Carbonyl Oxygen (Å)", row=i, col=j)
#         fig.update_yaxes(title_text="S70 to Carbonyl Oxygen (Å)", row=i, col=j)
#         fig.update_yaxes(range = [2,5])
#         fig.update_xaxes(range = [2,5])

# fig.update_layout(showlegend=False,
#     height=700, width=1000,
#     )
# fig.show()
# pio.write_image(fig, r"pics/cefo_fig4.png", scale=6)

### More Figures

1 per system + substrate, violin plots of each distance and violin plot of the sum of the distances?

In [20]:
# # benzyl, TEM1_1M40
# df = benzyl_dfs["TEM1_1M40"]
# fig = go.Figure()

# fig.add_trace(
#     go.Violin(
#               y=df["min_wat_glu"],
#               line_color="mediumpurple",
#               legendgroup="Deprotonation Distance (Å)", name="Deprotonation Distance (Å)",
#               ),
# )

# fig.add_trace(
#     go.Violin(
#               y=df["wat_sub"],
#               line_color="#1167b1",
#               legendgroup="Water Attack Distance (Å)", name="Water Attack Distance (Å)",
#               ),
# )

# fig.add_trace(
#     go.Violin(
#               y=df["lys73_sub"],
#               line_color="lightseagreen",
#               legendgroup="Lys73 to TI (Å)", name="Lys73 to TI (Å)",
#               ),
# )

# fig.add_trace(
#     go.Violin(
#               y=df["asn132_lys73"],
#               line_color="red",
#               legendgroup="Asn132 to Lys73 (Å)", name="Asn132 to Lys73 (Å)",
#               ),
# )

# fig.add_trace(
#     go.Violin(
#               y=df["ser130_sub"],
#               line_color="grey",
#               legendgroup="Ser130 to TI (Å)", name="Ser130 to TI (Å)",
#               ),
# )

# # new traces 
# fig.add_trace(
#     go.Violin(
#               y=df["a237_sub"],
#               line_color="darkgreen",
#               legendgroup="A237 to Carbonyl O (Å)", name="A237 to Carbonyl O (Å)",
#               ),
# )

# fig.add_trace(
#     go.Violin(
#               y=df["ser70_sub"],
#               line_color="orange",
#               legendgroup="S70 to Carbonyl Oxygen (Å)", name="S70 to Carbonyl O (Å)",
#               ),
# )


# #update characteristics shared by all traces
# fig.update_traces(meanline_visible=True,
#                 #   points="all", # show all points
#                 #   jitter=0.40,  # add some jitter on points for better visibility
# )
# fig.update_layout(
#     template="plotly_white",
#     yaxis= dict(title="Distance (Å)", titlefont=dict(size=32)),
#     margin=dict(l=20, r=20, t=20, b=20),
#     showlegend=False,
#     font_family="Arial",
#     width=1000,
#     height=600,
#     yaxis_range=[1,10]

# )

# fig.update_xaxes(showline=True, linewidth=2, linecolor='black', mirror=True,
#                  ticks="outside", tickwidth=2, tickcolor='black', ticklen=10,
#                  tickfont=dict(color='black', size=16))
# fig.update_yaxes(showline=True, linewidth=2, linecolor='black', mirror=True,
#                  ticks="outside", tickwidth=2, tickcolor='black', ticklen=10,
#                  tickfont=dict(color='black', size=22))
# fig.show("svg") # remove "svg" to make the figure interactive.

# pio.write_image(fig, r"pics/benzyl_tem1.png", scale=6)

In [21]:
# # benzyl, ENCA_3ZDJ
# df = benzyl_dfs["ENCA_3ZDJ"]
# fig = go.Figure()

# fig.add_trace(
#     go.Violin(
#               y=df["min_wat_glu"],
#               line_color="mediumpurple",
#               legendgroup="Deprotonation Distance (Å)", name="Deprotonation Distance (Å)",
#               ),
# )

# fig.add_trace(
#     go.Violin(
#               y=df["wat_sub"],
#               line_color="#1167b1",
#               legendgroup="Water Attack Distance (Å)", name="Water Attack Distance (Å)",
#               ),
# )

# fig.add_trace(
#     go.Violin(
#               y=df["lys73_sub"],
#               line_color="lightseagreen",
#               legendgroup="Lys73 to TI (Å)", name="Lys73 to TI (Å)",
#               ),
# )

# fig.add_trace(
#     go.Violin(
#               y=df["asn132_lys73"],
#               line_color="red",
#               legendgroup="Asn132 to Lys73 (Å)", name="Asn132 to Lys73 (Å)",
#               ),
# )

# fig.add_trace(
#     go.Violin(
#               y=df["ser130_sub"],
#               line_color="grey",
#               legendgroup="Ser130 to TI (Å)", name="Ser130 to TI (Å)",
#               ),
# )

# # new traces 
# fig.add_trace(
#     go.Violin(
#               y=df["a237_sub"],
#               line_color="darkgreen",
#               legendgroup="A237 to Carbonyl O (Å)", name="A237 to Carbonyl O (Å)",
#               ),
# )

# fig.add_trace(
#     go.Violin(
#               y=df["ser70_sub"],
#               line_color="orange",
#               legendgroup="S70 to Carbonyl Oxygen (Å)", name="S70 to Carbonyl O (Å)",
#               ),
# )


# #update characteristics shared by all traces
# fig.update_traces(meanline_visible=True,
#                 #   points="all", # show all points
#                 #   jitter=0.40,  # add some jitter on points for better visibility
# )
# fig.update_layout(
#     template="plotly_white",
#     yaxis= dict(title="Distance (Å)", titlefont=dict(size=32)),
#     margin=dict(l=20, r=20, t=20, b=20),
#     showlegend=False,
#     font_family="Arial",
#     width=1000,
#     height=600,
#     yaxis_range=[1,10]

# )

# fig.update_xaxes(showline=True, linewidth=2, linecolor='black', mirror=True,
#                  ticks="outside", tickwidth=2, tickcolor='black', ticklen=10,
#                  tickfont=dict(color='black', size=16))
# fig.update_yaxes(showline=True, linewidth=2, linecolor='black', mirror=True,
#                  ticks="outside", tickwidth=2, tickcolor='black', ticklen=10,
#                  tickfont=dict(color='black', size=22))
# fig.show("svg") # remove "svg" to make the figure interactive.

# pio.write_image(fig, r"pics/benzyl_enca.png", scale=6)

In [22]:
# # benzyl, GNCA
# df = benzyl_dfs["GNCA_4B88"]
# fig = go.Figure()

# fig.add_trace(
#     go.Violin(
#               y=df["min_wat_glu"],
#               line_color="mediumpurple",
#               legendgroup="Deprotonation Distance (Å)", name="Deprotonation Distance (Å)",
#               ),
# )

# fig.add_trace(
#     go.Violin(
#               y=df["wat_sub"],
#               line_color="#1167b1",
#               legendgroup="Water Attack Distance (Å)", name="Water Attack Distance (Å)",
#               ),
# )

# fig.add_trace(
#     go.Violin(
#               y=df["lys73_sub"],
#               line_color="lightseagreen",
#               legendgroup="Lys73 to TI (Å)", name="Lys73 to TI (Å)",
#               ),
# )

# fig.add_trace(
#     go.Violin(
#               y=df["asn132_lys73"],
#               line_color="red",
#               legendgroup="Asn132 to Lys73 (Å)", name="Asn132 to Lys73 (Å)",
#               ),
# )

# fig.add_trace(
#     go.Violin(
#               y=df["ser130_sub"],
#               line_color="grey",
#               legendgroup="Ser130 to TI (Å)", name="Ser130 to TI (Å)",
#               ),
# )

# # new traces 
# fig.add_trace(
#     go.Violin(
#               y=df["a237_sub"],
#               line_color="darkgreen",
#               legendgroup="A237 to Carbonyl O (Å)", name="A237 to Carbonyl O (Å)",
#               ),
# )

# fig.add_trace(
#     go.Violin(
#               y=df["ser70_sub"],
#               line_color="orange",
#               legendgroup="S70 to Carbonyl Oxygen (Å)", name="S70 to Carbonyl O (Å)",
#               ),
# )

# #update characteristics shared by all traces
# fig.update_traces(meanline_visible=True,
#                 #   points="all", # show all points
#                 #   jitter=0.40,  # add some jitter on points for better visibility
# )
# fig.update_layout(
#     template="plotly_white",
#     yaxis= dict(title="Distance (Å)", titlefont=dict(size=32)),
#     margin=dict(l=20, r=20, t=20, b=20),
#     showlegend=False,
#     font_family="Arial",
#     width=1000,
#     height=600,
#     yaxis_range=[1,10]

# )

# fig.update_xaxes(showline=True, linewidth=2, linecolor='black', mirror=True,
#                  ticks="outside", tickwidth=2, tickcolor='black', ticklen=10,
#                  tickfont=dict(color='black', size=16))
# fig.update_yaxes(showline=True, linewidth=2, linecolor='black', mirror=True,
#                  ticks="outside", tickwidth=2, tickcolor='black', ticklen=10,
#                  tickfont=dict(color='black', size=22))
# fig.show("svg") # remove "svg" to make the figure interactive.

# pio.write_image(fig, r"pics/benzyl_GNCA.png", scale=6)

In [23]:
# # benzyl, PNCA_4C6Y
# df = benzyl_dfs["PNCA_4C6Y"]
# fig = go.Figure()

# fig.add_trace(
#     go.Violin(
#               y=df["min_wat_glu"],
#               line_color="mediumpurple",
#               legendgroup="Deprotonation Distance (Å)", name="Deprotonation Distance (Å)",
#               ),
# )

# fig.add_trace(
#     go.Violin(
#               y=df["wat_sub"],
#               line_color="#1167b1",
#               legendgroup="Water Attack Distance (Å)", name="Water Attack Distance (Å)",
#               ),
# )

# fig.add_trace(
#     go.Violin(
#               y=df["lys73_sub"],
#               line_color="lightseagreen",
#               legendgroup="Lys73 to TI (Å)", name="Lys73 to TI (Å)",
#               ),
# )

# fig.add_trace(
#     go.Violin(
#               y=df["asn132_lys73"],
#               line_color="red",
#               legendgroup="Asn132 to Lys73 (Å)", name="Asn132 to Lys73 (Å)",
#               ),
# )

# fig.add_trace(
#     go.Violin(
#               y=df["ser130_sub"],
#               line_color="grey",
#               legendgroup="Ser130 to TI (Å)", name="Ser130 to TI (Å)",
#               ),
# )

# # new traces 
# fig.add_trace(
#     go.Violin(
#               y=df["a237_sub"],
#               line_color="darkgreen",
#               legendgroup="A237 to Carbonyl O (Å)", name="A237 to Carbonyl O (Å)",
#               ),
# )

# fig.add_trace(
#     go.Violin(
#               y=df["ser70_sub"],
#               line_color="orange",
#               legendgroup="S70 to Carbonyl Oxygen (Å)", name="S70 to Carbonyl O (Å)",
#               ),
# )



# #update characteristics shared by all traces
# fig.update_traces(meanline_visible=True,
#                 #   points="all", # show all points
#                 #   jitter=0.40,  # add some jitter on points for better visibility
# )
# fig.update_layout(
#     template="plotly_white",
#     yaxis= dict(title="Distance (Å)", titlefont=dict(size=32)),
#     margin=dict(l=20, r=20, t=20, b=20),
#     showlegend=False,
#     font_family="Arial",
#     width=1000,
#     height=600,
#     yaxis_range=[1,10]

# )

# fig.update_xaxes(showline=True, linewidth=2, linecolor='black', mirror=True,
#                  ticks="outside", tickwidth=2, tickcolor='black', ticklen=10,
#                  tickfont=dict(color='black', size=16))
# fig.update_yaxes(showline=True, linewidth=2, linecolor='black', mirror=True,
#                  ticks="outside", tickwidth=2, tickcolor='black', ticklen=10,
#                  tickfont=dict(color='black', size=22))
# fig.show("svg") # remove "svg" to make the figure interactive.

# pio.write_image(fig, r"pics/benzyl_pnca.png", scale=6)

In [24]:
# # cefo, TEM1_1M40
# df = cefo_dfs["TEM1_1M40"]
# fig = go.Figure()

# fig.add_trace(
#     go.Violin(
#               y=df["min_wat_glu"],
#               line_color="mediumpurple",
#               legendgroup="Deprotonation Distance (Å)", name="Deprotonation Distance (Å)",
#               ),
# )

# fig.add_trace(
#     go.Violin(
#               y=df["wat_sub"],
#               line_color="#1167b1",
#               legendgroup="Water Attack Distance (Å)", name="Water Attack Distance (Å)",
#               ),
# )

# fig.add_trace(
#     go.Violin(
#               y=df["lys73_sub"],
#               line_color="lightseagreen",
#               legendgroup="Lys73 to TI (Å)", name="Lys73 to TI (Å)",
#               ),
# )

# fig.add_trace(
#     go.Violin(
#               y=df["asn132_lys73"],
#               line_color="red",
#               legendgroup="Asn132 to Lys73 (Å)", name="Asn132 to Lys73 (Å)",
#               ),
# )

# fig.add_trace(
#     go.Violin(
#               y=df["ser130_sub"],
#               line_color="grey",
#               legendgroup="Ser130 to TI (Å)", name="Ser130 to TI (Å)",
#               ),
# )

# # new traces 
# fig.add_trace(
#     go.Violin(
#               y=df["a237_sub"],
#               line_color="darkgreen",
#               legendgroup="A237 to Carbonyl O (Å)", name="A237 to Carbonyl O (Å)",
#               ),
# )

# fig.add_trace(
#     go.Violin(
#               y=df["ser70_sub"],
#               line_color="orange",
#               legendgroup="S70 to Carbonyl Oxygen (Å)", name="S70 to Carbonyl O (Å)",
#               ),
# )

# #update characteristics shared by all traces
# fig.update_traces(meanline_visible=True,
#                 #   points="all", # show all points
#                 #   jitter=0.40,  # add some jitter on points for better visibility
# )
# fig.update_layout(
#     template="plotly_white",
#     yaxis= dict(title="Distance (Å)", titlefont=dict(size=32)),
#     margin=dict(l=20, r=20, t=20, b=20),
#     showlegend=False,
#     font_family="Arial",
#     width=1000,
#     height=600,
#     yaxis_range=[1,10]

# )

# fig.update_xaxes(showline=True, linewidth=2, linecolor='black', mirror=True,
#                  ticks="outside", tickwidth=2, tickcolor='black', ticklen=10,
#                  tickfont=dict(color='black', size=16))
# fig.update_yaxes(showline=True, linewidth=2, linecolor='black', mirror=True,
#                  ticks="outside", tickwidth=2, tickcolor='black', ticklen=10,
#                  tickfont=dict(color='black', size=22))
# fig.show("svg") # remove "svg" to make the figure interactive.

# pio.write_image(fig, r"pics/cefo_tem1.png", scale=6)

In [25]:
# # cefo,enca
# df = cefo_dfs["ENCA_3ZDJ"]
# fig = go.Figure()

# fig.add_trace(
#     go.Violin(
#               y=df["min_wat_glu"],
#               line_color="mediumpurple",
#               legendgroup="Deprotonation Distance (Å)", name="Deprotonation Distance (Å)",
#               ),
# )

# fig.add_trace(
#     go.Violin(
#               y=df["wat_sub"],
#               line_color="#1167b1",
#               legendgroup="Water Attack Distance (Å)", name="Water Attack Distance (Å)",
#               ),
# )

# fig.add_trace(
#     go.Violin(
#               y=df["lys73_sub"],
#               line_color="lightseagreen",
#               legendgroup="Lys73 to TI (Å)", name="Lys73 to TI (Å)",
#               ),
# )

# fig.add_trace(
#     go.Violin(
#               y=df["asn132_lys73"],
#               line_color="red",
#               legendgroup="Asn132 to Lys73 (Å)", name="Asn132 to Lys73 (Å)",
#               ),
# )

# fig.add_trace(
#     go.Violin(
#               y=df["ser130_sub"],
#               line_color="grey",
#               legendgroup="Ser130 to TI (Å)", name="Ser130 to TI (Å)",
#               ),
# )

# # new traces 
# fig.add_trace(
#     go.Violin(
#               y=df["a237_sub"],
#               line_color="darkgreen",
#               legendgroup="A237 to Carbonyl O (Å)", name="A237 to Carbonyl O (Å)",
#               ),
# )

# fig.add_trace(
#     go.Violin(
#               y=df["ser70_sub"],
#               line_color="orange",
#               legendgroup="S70 to Carbonyl Oxygen (Å)", name="S70 to Carbonyl O (Å)",
#               ),
# )


# #update characteristics shared by all traces
# fig.update_traces(meanline_visible=True,
#                 #   points="all", # show all points
#                 #   jitter=0.40,  # add some jitter on points for better visibility
# )
# fig.update_layout(
#     template="plotly_white",
#     yaxis= dict(title="Distance (Å)", titlefont=dict(size=32)),
#     margin=dict(l=20, r=20, t=20, b=20),
#     showlegend=False,
#     font_family="Arial",
#     width=1000,
#     height=600,
#     yaxis_range=[1,10]

# )

# fig.update_xaxes(showline=True, linewidth=2, linecolor='black', mirror=True,
#                  ticks="outside", tickwidth=2, tickcolor='black', ticklen=10,
#                  tickfont=dict(color='black', size=16))
# fig.update_yaxes(showline=True, linewidth=2, linecolor='black', mirror=True,
#                  ticks="outside", tickwidth=2, tickcolor='black', ticklen=10,
#                  tickfont=dict(color='black', size=22))
# fig.show("svg") # remove "svg" to make the figure interactive.

# pio.write_image(fig, r"pics/cefo_enca.png", scale=6)

In [26]:
# # cefo, pnca
# df = cefo_dfs["PNCA_4C6Y"]
# fig = go.Figure()

# fig.add_trace(
#     go.Violin(
#               y=df["min_wat_glu"],
#               line_color="mediumpurple",
#               legendgroup="Deprotonation Distance (Å)", name="Deprotonation Distance (Å)",
#               ),
# )

# fig.add_trace(
#     go.Violin(
#               y=df["wat_sub"],
#               line_color="#1167b1",
#               legendgroup="Water Attack Distance (Å)", name="Water Attack Distance (Å)",
#               ),
# )

# fig.add_trace(
#     go.Violin(
#               y=df["lys73_sub"],
#               line_color="lightseagreen",
#               legendgroup="Lys73 to TI (Å)", name="Lys73 to TI (Å)",
#               ),
# )

# fig.add_trace(
#     go.Violin(
#               y=df["asn132_lys73"],
#               line_color="red",
#               legendgroup="Asn132 to Lys73 (Å)", name="Asn132 to Lys73 (Å)",
#               ),
# )

# fig.add_trace(
#     go.Violin(
#               y=df["ser130_sub"],
#               line_color="grey",
#               legendgroup="Ser130 to TI (Å)", name="Ser130 to TI (Å)",
#               ),
# )

# # new traces 
# fig.add_trace(
#     go.Violin(
#               y=df["a237_sub"],
#               line_color="darkgreen",
#               legendgroup="A237 to Carbonyl O (Å)", name="A237 to Carbonyl O (Å)",
#               ),
# )

# fig.add_trace(
#     go.Violin(
#               y=df["ser70_sub"],
#               line_color="orange",
#               legendgroup="S70 to Carbonyl Oxygen (Å)", name="S70 to Carbonyl O (Å)",
#               ),
# )

# #update characteristics shared by all traces
# fig.update_traces(meanline_visible=True,
#                 #   points="all", # show all points
#                 #   jitter=0.40,  # add some jitter on points for better visibility
# )
# fig.update_layout(
#     template="plotly_white",
#     yaxis= dict(title="Distance (Å)", titlefont=dict(size=32)),
#     margin=dict(l=20, r=20, t=20, b=20),
#     showlegend=False,
#     font_family="Arial",
#     width=1000,
#     height=600,
#     yaxis_range=[1,10]

# )

# fig.update_xaxes(showline=True, linewidth=2, linecolor='black', mirror=True,
#                  ticks="outside", tickwidth=2, tickcolor='black', ticklen=10,
#                  tickfont=dict(color='black', size=16))
# fig.update_yaxes(showline=True, linewidth=2, linecolor='black', mirror=True,
#                  ticks="outside", tickwidth=2, tickcolor='black', ticklen=10,
#                  tickfont=dict(color='black', size=22))
# fig.show("svg") # remove "svg" to make the figure interactive.

# pio.write_image(fig, r"pics/cefo_pnca.png", scale=6)

In [27]:
# # cefo, GNCA
# df = cefo_dfs["GNCA_4B88"]
# fig = go.Figure()

# fig.add_trace(
#     go.Violin(
#               y=df["min_wat_glu"],
#               line_color="mediumpurple",
#               legendgroup="Deprotonation Distance (Å)", name="Deprotonation Distance (Å)",
#               ),
# )

# fig.add_trace(
#     go.Violin(
#               y=df["wat_sub"],
#               line_color="#1167b1",
#               legendgroup="Water Attack Distance (Å)", name="Water Attack Distance (Å)",
#               ),
# )

# fig.add_trace(
#     go.Violin(
#               y=df["lys73_sub"],
#               line_color="lightseagreen",
#               legendgroup="Lys73 to TI (Å)", name="Lys73 to TI (Å)",
#               ),
# )

# fig.add_trace(
#     go.Violin(
#               y=df["asn132_lys73"],
#               line_color="red",
#               legendgroup="Asn132 to Lys73 (Å)", name="Asn132 to Lys73 (Å)",
#               ),
# )

# fig.add_trace(
#     go.Violin(
#               y=df["ser130_sub"],
#               line_color="grey",
#               legendgroup="Ser130 to TI (Å)", name="Ser130 to TI (Å)",
#               ),
# )

# # new traces 
# fig.add_trace(
#     go.Violin(
#               y=df["a237_sub"],
#               line_color="darkgreen",
#               legendgroup="A237 to Carbonyl O (Å)", name="A237 to Carbonyl O (Å)",
#               ),
# )

# fig.add_trace(
#     go.Violin(
#               y=df["ser70_sub"],
#               line_color="orange",
#               legendgroup="S70 to Carbonyl Oxygen (Å)", name="S70 to Carbonyl O (Å)",
#               ),
# )

# #update characteristics shared by all traces
# fig.update_traces(meanline_visible=True,
#                 #   points="all", # show all points
#                 #   jitter=0.40,  # add some jitter on points for better visibility
# )
# fig.update_layout(
#     template="plotly_white",
#     yaxis= dict(title="Distance (Å)", titlefont=dict(size=32)),
#     margin=dict(l=20, r=20, t=20, b=20),
#     showlegend=False,
#     font_family="Arial",
#     width=1000,
#     height=600,
#     yaxis_range=[1,10]

# )

# fig.update_xaxes(showline=True, linewidth=2, linecolor='black', mirror=True,
#                  ticks="outside", tickwidth=2, tickcolor='black', ticklen=10,
#                  tickfont=dict(color='black', size=16))
# fig.update_yaxes(showline=True, linewidth=2, linecolor='black', mirror=True,
#                  ticks="outside", tickwidth=2, tickcolor='black', ticklen=10,
#                  tickfont=dict(color='black', size=22))
# fig.show("svg") # remove "svg" to make the figure interactive.

# pio.write_image(fig, r"pics/cefo_gnca.png", scale=6)