In [1]:
import argparse
from pathlib import Path
import numpy as np
from rdkit import Chem
from rdkit.Chem.Draw import IPythonConsole


In [2]:
import pandas as pd
from tqdm import tqdm
import prolif as plf
import MDAnalysis as mda
from MDAnalysis.topology.guessers import guess_types
from pathlib import Path

In [3]:
IPythonConsole.ipython_useSVG = True

# Initialize progress bar for pandas
tqdm.pandas()

In [4]:
final_results_df = pd.read_csv('processed.csv')
final_results_df.shape

(34107, 13)

In [5]:
final_results_df.head()

Unnamed: 0,Residue,Hydrophobic.distance,VdWContact.distance,HBDonor.distance,HBDonor.DHA_angle,Cationic.distance,HBAcceptor.distance,HBAcceptor.DHA_angle,PiStacking.distance,PiStacking.plane_angle,PiStacking.normal_to_centroid_angle,PiStacking.intersect_distance,PDB_File
0,VAL60.A,4.475821,,,,,,,,,,,0
1,ASP61.A,,3.103711,,,,,,,,,,0
2,PHE64.A,4.00642,,,,,,,,,,,0
3,PHE64.A,,3.348581,,,,,,,,,,0
4,PHE64.A,4.00642,,,,,,,,,,,0


In [11]:
import plotly.express as px
import plotly.graph_objects as go
from ipywidgets import interact, widgets


### Viewing distribution for 1 pdb file (tmp_0.pdb)

In [9]:
def plot_distributions(df, pdb_files, columns):
    @interact(column=columns)
    def make_plot(column):
        fig = go.Figure()
        
        for pdb_file in pdb_files:
            filtered_df = df[df['PDB_File'] == pdb_file]
            fig.add_trace(go.Histogram(
                x=filtered_df[column],
                name=pdb_file,
                opacity=0.75
            ))
        
        fig.update_layout(
            title=f'Distribution of {column}',
            xaxis_title=column,
            yaxis_title='Count',
            barmode='overlay'
        )
        
        fig.show()

# Columns to choose from
columns = final_results_df.columns.tolist()
columns.remove('PDB_File')

# Testing distribution for 1 pdb file
pdb_files = [0]

# Plot
plot_distributions(final_results_df, pdb_files, columns)

interactive(children=(Dropdown(description='column', options=('Residue', 'Hydrophobic.distance', 'VdWContact.d…

##### Select from dropdown to view other distances/metrics in the above output 

In [14]:
def plot_mean_with_confidence_intervals(df, numeric_columns):
    @interact(column=numeric_columns)
    def make_plot(column):
        grouped = df.groupby('PDB_File')[column]
        means = grouped.mean()
        sems = grouped.sem()  # Standard error of the mean

        fig = go.Figure()
        
        # Add mean line
        fig.add_trace(go.Scatter(
            x=means.index,
            y=means.values,
            mode='lines+markers',
            name='Mean',
            line=dict(color='blue')
        ))

        # Add confidence interval (mean ± SEM)
        fig.add_trace(go.Scatter(
            x=means.index,
            y=means.values + sems.values,
            fill=None,
            mode='lines',
            line=dict(color='lightblue'),
            showlegend=False
        ))

        fig.add_trace(go.Scatter(
            x=means.index,
            y=means.values - sems.values,
            fill='tonexty',  # Fill the area between this trace and the previous one
            mode='lines',
            line=dict(color='lightblue'),
            name='Confidence Interval'
        ))
        
        fig.update_layout(
            title=f'Mean and Confidence Interval of {column} across PDB Files',
            xaxis_title='PDB File',
            yaxis_title=column
        )
        
        fig.show()

# Numeric columns to choose from
numeric_columns = [
    col for col in final_results_df.columns
    if col not in ['PDB_File', 'Residue'] and pd.api.types.is_numeric_dtype(final_results_df[col])
]

# Plot
plot_mean_with_confidence_intervals(final_results_df, numeric_columns)

interactive(children=(Dropdown(description='column', options=('Hydrophobic.distance', 'VdWContact.distance', '…