In [1]:
# Install Java if it’s not already available in your Colab environment
!apt-get install -y openjdk-8-jdk-headless > /dev/null 2>&1


In [None]:
# @title Import Libraries
# Import necessary libraries
import pandas as pd
import numpy as np
from PIL import Image
import subprocess
import os
import base64
import pickle
import seaborn as sns
import matplotlib.pyplot as plt
from google.colab import files
!pip install gdown

In [None]:
# @title Install Required Files {"display-mode":"form"}
# Clone the required repository from GitHub
!git clone https://github.com/saiflab/AmyPic.git

# Specify the file ID and name for downloading the model file from Google Drive
file_id = "162s4ZorHK4oQnktlyQ_K2qoFvWOW9pKO"
file_name = "APP_Model.pkl"

# Download the model file using gdown
import gdown
gdown.download(f"https://drive.google.com/uc?id={file_id}", file_name, quiet=False)

# Download additional Padel resources from GitHub
!wget https://github.com/saiflab/Code/raw/main/padel.zip
!wget https://github.com/saiflab/Code/raw/main/padel.sh

# Extract the contents of the downloaded Padel archive
!unzip padel.zip

In [None]:
# @title Molecular descriptor calculator
# Display the app title and information
print("""
# Amyloid-Beta Precursor Protein pIC50 value Identification Tools
### Amyloid-Beta Precursor Protein
This notebook allows you to predict the bioactivity towards inhibiting the Human Amyloid-Beta Precursor Protein. APP is a drug target for Alzheimer's disease.

#### Some Important Information:
- Fingerprints used: PubChem Fingerprints
- Model Used: Random Forest Regression
- Descriptor calculated using [PaDEL-Descriptor](http://www.yapcwsoft.com/dd/padeldescriptor/)
""")

import os
import subprocess
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pickle

# Descriptor Calculation Function
def desc_calc(input_file="molecule.smi", output_file="descriptors_output.csv"):
    """
    Calculate molecular descriptors using PaDEL-Descriptor.
    """
    jar_path = "./PaDEL-Descriptor/PaDEL-Descriptor.jar"  # Ensure the correct path
    if not os.path.exists(jar_path):
        print(f"Error: PaDEL-Descriptor JAR file not found at {jar_path}.")
        return False

    # Construct the command for PaDEL-Descriptor
    bashCommand = (
        f"java -Xms2G -Xmx2G -Djava.awt.headless=true "
        f"-jar {jar_path} "
        "-removesalt -standardizenitro -fingerprints "
        "-descriptortypes ./PaDEL-Descriptor/PubchemFingerprinter.xml "
        f"-dir ./ -file {output_file}"
    )

    print("Running PaDEL-Descriptor...")
    process = subprocess.Popen(bashCommand, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    output, error = process.communicate()

    if process.returncode != 0:
        print(f"Error occurred during descriptor calculation: {error.decode('utf-8')}")
        return False
    else:
        print("Descriptor calculation completed successfully.")
        return True

# Load and Process Input File
def load_input_file(uploaded_file):
    """
    Load the uploaded file containing SMILES notations.
    """
    print("Loading input file...")
    for filename in uploaded_file.keys():
        load_data = pd.read_table(filename, sep=' ', header=None)
        load_data.to_csv('molecule.smi', sep='\t', header=False, index=False)
        print("\n**Original Input Data**")
        print(load_data)
        return load_data

# Upload Input File
print("Please upload your input .txt file (containing SMILES notations):")
from google.colab import files
uploaded_file = files.upload()

# Process Input File
input_data = load_input_file(uploaded_file)

# Run Descriptor Calculation
if not desc_calc(input_file="molecule.smi", output_file="descriptors_output.csv"):
    print("Descriptor calculation failed. Please check the logs for errors.")
else:
    print("Descriptor calculation successful.")




In [None]:
# @title Load File & Make Predictions
# molecule_name from second element of input_data
molecule_name = pd.Series(input_data[1], name='molecule_name')
# Model Prediction Function
def build_model(input_data):
    """
    Load the pre-trained model and make predictions.
    """
    # Load the pre-trained model
    model_path = "/content/APP_Model.pkl"
    if not os.path.exists(model_path):
        print(f"Error: Model file not found at {model_path}.")
        return

    with open(model_path, 'rb') as model_file:
        model = pickle.load(model_file)

    # Predict using the model
    print("Making predictions...")
    predictions = model.predict(input_data)
    prediction_output = pd.Series(predictions, name='pIC50')

    # Combine molecule names and predictions
    # Accessing the second row and converting it into a Series
    df = pd.concat([molecule_name, prediction_output], axis=1)

    # Display predictions
    print("\n**Prediction Output**")
    print(df)

    # Save predictions
    df.to_csv('molecule_predictions.csv', index=False)
    print("Predictions saved to 'molecule_predictions.csv'.")

    # Plot the predictions
    plt.figure(figsize=(8, 6))
    sns.barplot(y=df['molecule_name'], x=df['pIC50'], errwidth=0)
    plt.xlabel("pIC50 Values", size=12, fontstyle='normal', weight=600)
    plt.ylabel("Molecule Name", size=12, fontstyle='normal', weight=600)
    plt.title("pIC50 Value of Various Molecules", fontstyle='normal', weight=600)
    plt.show()

# Load Descriptors and Predict
print("Loading calculated descriptors...")
desc = pd.read_csv('descriptors_output.csv')
print("\n**Calculated Molecular Descriptors**")
print(desc)
print("Shape:", desc.shape)

# Load descriptor list used in the pre-trained model
Xlist_path = "/content/AmyPic/descriptor_list.csv"
if os.path.exists(Xlist_path):
    Xlist = list(pd.read_csv(Xlist_path).columns)
    desc_subset = desc[Xlist]
    print("\n**Subset of Descriptors from Previously Built Model**")
    print(desc_subset)
    print("Shape:", desc_subset.shape)

    # Apply the trained model to make predictions
    build_model(desc_subset)
else:
    print(f"Error: Descriptor list file not found at {Xlist_path}.")

In [None]:
# @title Download the Results
# Provide the path to the file you want to download
file_path = "/content/molecule_predictions.csv"

# Download the file
files.download(file_path)