In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import ipywidgets as widgets
from IPython.display import display


### Import ChemPLP Docking and ChemScore/GoldScore/ASP Rescoring Results

In [None]:
# Load the data into a DataFrame, skipping the first 5 lines
df_ChemPLP = pd.read_csv("./bestranking_ChemPLP.lst", skiprows=5, delim_whitespace=True, index_col=False, quotechar="'")

# Drop the two columns with NaN values
df_ChemPLP.drop(columns=["Ligand", "name.1"], inplace=True)

# Correct file name
df_ChemPLP['File'] = df_ChemPLP['File'].str.replace(r'^\.\x5C', '', regex=True)

### Import ChemScore/GoldScore/ASP Rescoring Results

In [None]:
# Pre-treat rescore log files
def treat_rescore_logs(df_name, rescore_file):
    df_name = pd.read_csv(rescore_file, skiprows=2, delim_whitespace=True, index_col=False)
    df_name.drop(columns=["Ligand", "name.1"], inplace=True)
    df_name["name"] = df_name["name"].str.extract(r"(Z.*?)\|")
    return df_name


df_ChemScore = treat_rescore_logs("df_ChemScore", "rescore_ChemScore.log")
df_GoldScore = treat_rescore_logs("df_GoldScore", "rescore_GoldScore.log")
df_ASP = treat_rescore_logs("df_ASP", "rescore_ASP.log")


In [None]:
# Merge df_ChemPLP and df_ChemScore
df_ChemPLP.drop(columns=['S(PLP)', 'S(hbond)', 'S(cho)', 'S(metal)', 'DE(clash)',
       'DE(tors)', 'intcor', 'time', 'File'], inplace=True)
                
merged_df = df_ChemPLP.merge(df_ChemScore[["name", "Score"]], on="name", suffixes=("_ChemPLP", "_ChemScore"))

# Merge df_GoldScore
df_GoldScore.rename(columns={"Fitness": "Score_GoldScore"}, inplace=True)
merged_df = merged_df.merge(df_GoldScore[["name", "Score_GoldScore"]], on="name")

# Merge df_ASP
df_ASP.rename(columns={"Score": "Score_ASP"}, inplace=True)
merged_df = merged_df.merge(df_ASP[["name", "Score_ASP"]], on="name")

# Reorder the columns
merged_df = merged_df[['name', 'Score_ChemPLP', 'Score_ChemScore', 'Score_GoldScore', 'Score_ASP']]

### Import Vina and Vinardo Rescoring Results

In [None]:
df_vina = pd.read_csv("./results_sorted_vina_scoring.txt", skiprows=2, names= ["name", "Score_VINA"], sep = ":")
df_vinardo = pd.read_csv("./results_sorted_vinardo_scoring.txt", skiprows=2, names= ["name", "Score_VINARDO"], sep = ":")

# Merge to main dataframe
merged_df = merged_df.merge(df_vina[["name", "Score_VINA"]], on="name")
merged_df = merged_df.merge(df_vinardo[["name", "Score_VINARDO"]], on="name")

In [None]:
def sort_dataframe(column, ascending):
    sorted_df = merged_df.sort_values(by=column, ascending=ascending)
    display(sorted_df)

# Create a dropdown widget to select the column to sort
column_dropdown = widgets.Dropdown(options=merged_df.columns, description='Sort by:')
# Create a checkbox widget to choose ascending or descending order
ascending_checkbox = widgets.Checkbox(value=True, description='Ascending')
# Use an interactive output to display the sorted DataFrame
out = widgets.interactive_output(sort_dataframe, {'column': column_dropdown, 'ascending': ascending_checkbox})

# Display the widgets and output
display(widgets.HBox([column_dropdown, ascending_checkbox]), out)

In [None]:
condition_PLP = merged_df['Score_ChemPLP'] > 60
condition_CScore = merged_df['Score_ChemScore'] > 25
condition_GScore = merged_df['Score_GoldScore'] > 40
condition_ASP = merged_df['Score_ASP'] > 30
condition_Vina = merged_df['Score_VINA'] < -7 #7
condition_Vinardo = merged_df['Score_VINARDO'] < -5 #-5
# Add more conditions as needed

# Combine the conditions using logical operators (& for AND, | for OR)
filtered_df = merged_df[condition_PLP & condition_CScore & condition_GScore & condition_ASP & condition_Vina & condition_Vinardo]

# Extract the 'name' column from the filtered dataframe
matching_names = filtered_df['name']

# matching_names = matching_names.reset_index(drop=True)

print(matching_names)


In [None]:
merged_df.loc[merged_df['name'].isin(['Z1171328906', 'Z1842122377', 'Z1143443933'])]

In [None]:
merged_df

In [None]:
# Drop failed docking results or "negative" values to make kde plot prettier
# e.g. remove 4.5 from vina score or -50 from Gold functions
merged_df.drop(merged_df[merged_df['Score_GoldScore'] < 0].index, inplace=True)
merged_df.drop(merged_df[merged_df['Score_ChemScore'] < 0].index, inplace=True)
merged_df.drop(merged_df[merged_df['Score_ASP'] < 0].index, inplace=True)
merged_df.drop(merged_df[merged_df['Score_ChemPLP'] < 0].index, inplace=True)
merged_df.drop(merged_df[merged_df['Score_VINA'] > 0].index, inplace=True)
merged_df.drop(merged_df[merged_df['Score_VINARDO'] > 0].index, inplace=True)

In [None]:
columns_to_plot = ['Score_ChemPLP', 'Score_ChemScore', 'Score_GoldScore', 'Score_ASP', 'Score_VINA', 'Score_VINARDO']


plt.figure(figsize=(12, 8))
plt.ylim((0.0, 0.5))

legend_labels = ['ChemPLP', 'ChemScore', 'GoldScore', 'ASP', 'Vina', 'Vinardo']
for i, column in enumerate(columns_to_plot):
    sns.kdeplot(data=merged_df, x=column, fill=True, label=legend_labels[i])


plt.xlabel("Docking Score", fontsize=14)
plt.ylabel("Density", fontsize=14)

plt.legend(prop={'size': 14})
plt.show()

In [None]:
len(merged_df)