# Tool page to interogate large amounts Boltz-2 Protein Ligand predictions

This Notebook is only useful if predictions have been run as setup by the "Generate_YAML" notebook.

Intended use:
Be able to filter Boltz-2 predictions by:
- Prediction confidence_score
- Proximity of average ligand position to C-alpha atoms (Up to three positions)
- By amount of Hydrogen bonds between ligand and receptor
- Prediction of ligand affinity

It will then export the results, as an all against one aligned mmCIF files, with a combined CSV exporting all data.

In [None]:
# Stuff in import
from filtering_functions import *
import os
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


In [None]:
folder_list = os.listdir('./') # Define the root folder where the boltz-2 output folders are located

confidence_value = float(0.7) # Define the boltz-2 confidence value to filter results



In [None]:
# Combined Boltz-2 Tools script library


print(f"Found folders: {folder_list}")  # Debugging statement

for folder in folder_list:
    if folder.startswith('boltz_results_'):
        predictions_folder = os.path.join(folder, 'predictions/', folder.split('_')[2])
        if not os.path.exists(predictions_folder):
            print(f"Predictions folder does not exist: {predictions_folder}")
            continue
        else:
            print(f"working on folder: {predictions_folder}")
        print(f"Processing folder: {predictions_folder}")  # Debugging statement
        results_df = parse_boltz2_results(os.path.join('./', predictions_folder))
        # For example, to save it as a CSV file:
        results_df.to_csv(f"{folder}_results.csv", index=False)
    else:
        print(f"Skipping folder: {folder} (does not start with 'boltz_results_')")

# Combine all the CSV files into a single DataFrame

file_list = [f for f in os.listdir('./') if f.endswith('_results.csv')]

combined_df = combine_csv_results(file_list)
combined_df.to_csv('boltz_results_combined.csv', index=False)
print("Combined results saved to 'boltz_results_combined.csv'")  # Debugging statement



# Filtering the DataFrame to include only rows with a confidence score greater than 0.9 
filtered_df = combined_df[combined_df['confidence_score'] > confidence_value]
filtered_df.to_csv('boltz_results_filtered.csv', index=False)
print("Filtered results saved to 'boltz_results_filtered.csv'")  # Debugging statement

# Using the filtered DataFrame to copy the corresponding .cif model files into a new directory
output_dir = 'filtered_models'
os.makedirs(output_dir, exist_ok=True)
for index, row in filtered_df.iterrows():
    model_path = row['model_path']
    if os.path.exists(model_path):
        # Copy the model file to the output directory
        os.system(f"cp {model_path} {output_dir}")
        print(f"Copied {model_path} to {output_dir}")
    else:
        print(f"Model file does not exist: {model_path}")  # Debugging statement

# remove the temporary .csv files
for file in file_list:
    if os.path.exists(file):
        os.remove(file)
        print(f"Removed temporary file: {file}")  # Debugging statement
    else:
        print(f"File does not exist: {file}")  # Debugging statement


# Copy all the .cif files from the combined results to a new directory
combined_output_dir = 'combined_models'
os.makedirs(combined_output_dir, exist_ok=True)
for file in combined_df['model_path'].unique():
    if os.path.exists(file):
        # Copy the model file to the output directory
        os.system(f"cp {file} {combined_output_dir}")
        print(f"Copied {file} to {combined_output_dir}")
    else:
        print(prediction_id)
        print(f"File does not exist: {file}")  # Debugging statement


In [None]:
# Create ZIP archive of the important results for sharing
# Path to the existing archive
archive_name = f'boltz_results_archive_{confidence_value}'

# Temporary directory to extract the archive
temp_dir = 'temp_archive'

# Copy the .csv files to the temporary directory
os.makedirs(temp_dir, exist_ok=True)
shutil.copy('boltz_results_combined.csv', temp_dir)
shutil.copy('boltz_results_filtered.csv', temp_dir)
shutil.copy('top_confidence_results_all_models.csv', temp_dir)
shutil.copy('top_confidence_results_with_pharm_data.csv', temp_dir)

# Copy the filtered_models directory to the temporary directory
filtered_models_path = os.path.join(temp_dir, 'filtered_models')
shutil.copytree('filtered_models', filtered_models_path, dirs_exist_ok=True)

# Copy the combined_models directory to the temporary directory
combined_models_path = os.path.join(temp_dir, 'combined_models')
shutil.copytree('combined_models', combined_models_path, dirs_exist_ok=True)

# Create the archive with the updated contents
shutil.make_archive(archive_name, 'zip', temp_dir)

# Clean up the temporary directory
shutil.rmtree(temp_dir)


print("Results Filtered and Collated comrade")